From c887379baf8e2fdee856ba033354fc0f0ddb35ba Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Oct 2007 15:08:48 -0700 Subject: [PATCH] --- yaml --- r: 77930 b: refs/heads/master c: 5c98fc0360437327e4034ecfe8b818ad82622100 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/Documentation/filesystems/ext4.txt | 20 +- trunk/Documentation/filesystems/proc.txt | 39 - trunk/fs/Kconfig | 1 - trunk/fs/afs/dir.c | 9 +- trunk/fs/afs/inode.c | 3 +- trunk/fs/buffer.c | 44 - trunk/fs/ext2/super.c | 32 +- trunk/fs/ext3/super.c | 32 +- trunk/fs/ext4/Makefile | 2 +- trunk/fs/ext4/balloc.c | 247 +- trunk/fs/ext4/dir.c | 14 +- trunk/fs/ext4/extents.c | 481 +- trunk/fs/ext4/file.c | 23 +- trunk/fs/ext4/group.h | 8 +- trunk/fs/ext4/ialloc.c | 141 +- trunk/fs/ext4/inode.c | 360 +- trunk/fs/ext4/ioctl.c | 7 +- trunk/fs/ext4/mballoc.c | 4552 ----------------- trunk/fs/ext4/migrate.c | 560 -- trunk/fs/ext4/namei.c | 135 +- trunk/fs/ext4/resize.c | 28 +- trunk/fs/ext4/super.c | 379 +- trunk/fs/ext4/xattr.c | 4 +- trunk/fs/inode.c | 5 - trunk/fs/jbd2/checkpoint.c | 22 +- trunk/fs/jbd2/commit.c | 255 +- trunk/fs/jbd2/journal.c | 368 +- trunk/fs/jbd2/recovery.c | 151 +- trunk/fs/jbd2/revoke.c | 6 +- trunk/fs/jbd2/transaction.c | 34 +- trunk/fs/ocfs2/cluster/sys.c | 2 +- trunk/fs/read_write.c | 1 - trunk/include/asm-arm/bitops.h | 2 - .../asm-generic/bitops/ext2-non-atomic.h | 2 - trunk/include/asm-generic/bitops/le.h | 4 - trunk/include/asm-m68k/bitops.h | 2 - trunk/include/asm-m68knommu/bitops.h | 2 - trunk/include/asm-powerpc/bitops.h | 4 - trunk/include/asm-s390/bitops.h | 2 - trunk/include/linux/buffer_head.h | 2 - trunk/include/linux/ext4_fs.h | 198 +- trunk/include/linux/ext4_fs_extents.h | 25 +- trunk/include/linux/ext4_fs_i.h | 25 +- trunk/include/linux/ext4_fs_sb.h | 55 +- trunk/include/linux/fs.h | 19 +- trunk/include/linux/jbd2.h | 135 +- trunk/include/linux/module.h | 22 +- trunk/kernel/extable.c | 3 +- trunk/kernel/kallsyms.c | 11 +- trunk/kernel/module.c | 102 +- trunk/kernel/params.c | 8 +- trunk/lib/find_next_bit.c | 43 - trunk/scripts/kernel-doc | 6 +- 54 files changed, 865 insertions(+), 7774 deletions(-) delete mode 100644 trunk/fs/ext4/mballoc.c delete mode 100644 trunk/fs/ext4/migrate.c diff --git a/[refs] b/[refs] index e84cc381b570..e52920f751e2 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 03bc26cfefd6db756e6bc7fcda11dc17ada7be16 +refs/heads/master: 5c98fc0360437327e4034ecfe8b818ad82622100 diff --git a/trunk/Documentation/filesystems/ext4.txt b/trunk/Documentation/filesystems/ext4.txt index 560f88dc7090..6a4adcae9f9a 100644 --- a/trunk/Documentation/filesystems/ext4.txt +++ b/trunk/Documentation/filesystems/ext4.txt @@ -86,21 +86,9 @@ Alex is working on a new set of patches right now. When mounting an ext4 filesystem, the following option are accepted: (*) == default -extents (*) ext4 will use extents to address file data. The +extents ext4 will use extents to address file data. The file system will no longer be mountable by ext3. -noextents ext4 will not use extents for newly created files - -journal_checksum Enable checksumming of the journal transactions. - This will allow the recovery code in e2fsck and the - kernel to detect corruption in the kernel. It is a - compatible change and will be ignored by older kernels. - -journal_async_commit Commit block can be written to disk without waiting - for descriptor blocks. If enabled older kernels cannot - mount the device. This will enable 'journal_checksum' - internally. - journal=update Update the ext4 file system's journal to the current format. @@ -208,12 +196,6 @@ nobh (a) cache disk block mapping information "nobh" option tries to avoid associating buffer heads (supported only for "writeback" mode). -mballoc (*) Use the multiple block allocator for block allocation -nomballoc disabled multiple block allocator for block allocation. -stripe=n Number of filesystem blocks that mballoc will try - to use for allocation size and alignment. For RAID5/6 - systems this should be the number of data - disks * RAID chunk size in file system blocks. Data Mode --------- diff --git a/trunk/Documentation/filesystems/proc.txt b/trunk/Documentation/filesystems/proc.txt index 4413a2d4646f..dec99455321f 100644 --- a/trunk/Documentation/filesystems/proc.txt +++ b/trunk/Documentation/filesystems/proc.txt @@ -857,45 +857,6 @@ CPUs. The "procs_blocked" line gives the number of processes currently blocked, waiting for I/O to complete. -1.9 Ext4 file system parameters ------------------------------- -Ext4 file system have one directory per partition under /proc/fs/ext4/ -# ls /proc/fs/ext4/hdc/ -group_prealloc max_to_scan mb_groups mb_history min_to_scan order2_req -stats stream_req - -mb_groups: -This file gives the details of mutiblock allocator buddy cache of free blocks - -mb_history: -Multiblock allocation history. - -stats: -This file indicate whether the multiblock allocator should start collecting -statistics. The statistics are shown during unmount - -group_prealloc: -The multiblock allocator normalize the block allocation request to -group_prealloc filesystem blocks if we don't have strip value set. -The stripe value can be specified at mount time or during mke2fs. - -max_to_scan: -How long multiblock allocator can look for a best extent (in found extents) - -min_to_scan: -How long multiblock allocator must look for a best extent - -order2_req: -Multiblock allocator use 2^N search using buddies only for requests greater -than or equal to order2_req. The request size is specfied in file system -blocks. A value of 2 indicate only if the requests are greater than or equal -to 4 blocks. - -stream_req: -Files smaller than stream_req are served by the stream allocator, whose -purpose is to pack requests as close each to other as possible to -produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16 -filesystem block size will use group based preallocation. ------------------------------------------------------------------------------ Summary diff --git a/trunk/fs/Kconfig b/trunk/fs/Kconfig index 219ec06a8c7e..9656139d2e99 100644 --- a/trunk/fs/Kconfig +++ b/trunk/fs/Kconfig @@ -236,7 +236,6 @@ config JBD_DEBUG config JBD2 tristate - select CRC32 help This is a generic journaling layer for block devices that support both 32-bit and 64-bit block numbers. It is currently used by diff --git a/trunk/fs/afs/dir.c b/trunk/fs/afs/dir.c index 0cc3597c1197..33fe39ad4e03 100644 --- a/trunk/fs/afs/dir.c +++ b/trunk/fs/afs/dir.c @@ -546,11 +546,11 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, dentry->d_op = &afs_fs_dentry_operations; d_add(dentry, inode); - _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", + _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%lu }", fid.vnode, fid.unique, dentry->d_inode->i_ino, - (unsigned long long)dentry->d_inode->i_version); + dentry->d_inode->i_version); return NULL; } @@ -630,10 +630,9 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) * been deleted and replaced, and the original vnode ID has * been reused */ if (fid.unique != vnode->fid.unique) { - _debug("%s: file deleted (uq %u -> %u I:%llu)", + _debug("%s: file deleted (uq %u -> %u I:%lu)", dentry->d_name.name, fid.unique, - vnode->fid.unique, - (unsigned long long)dentry->d_inode->i_version); + vnode->fid.unique, dentry->d_inode->i_version); spin_lock(&vnode->lock); set_bit(AFS_VNODE_DELETED, &vnode->flags); spin_unlock(&vnode->lock); diff --git a/trunk/fs/afs/inode.c b/trunk/fs/afs/inode.c index 84750c8e9f95..d196840127c6 100644 --- a/trunk/fs/afs/inode.c +++ b/trunk/fs/afs/inode.c @@ -301,8 +301,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, inode = dentry->d_inode; - _enter("{ ino=%lu v=%llu }", inode->i_ino, - (unsigned long long)inode->i_version); + _enter("{ ino=%lu v=%lu }", inode->i_ino, inode->i_version); generic_fillattr(inode, stat); return 0; diff --git a/trunk/fs/buffer.c b/trunk/fs/buffer.c index 456c9ab7705b..7249e014819e 100644 --- a/trunk/fs/buffer.c +++ b/trunk/fs/buffer.c @@ -3213,50 +3213,6 @@ static int buffer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -/** - * bh_uptodate_or_lock: Test whether the buffer is uptodate - * @bh: struct buffer_head - * - * Return true if the buffer is up-to-date and false, - * with the buffer locked, if not. - */ -int bh_uptodate_or_lock(struct buffer_head *bh) -{ - if (!buffer_uptodate(bh)) { - lock_buffer(bh); - if (!buffer_uptodate(bh)) - return 0; - unlock_buffer(bh); - } - return 1; -} -EXPORT_SYMBOL(bh_uptodate_or_lock); - -/** - * bh_submit_read: Submit a locked buffer for reading - * @bh: struct buffer_head - * - * Returns zero on success and -EIO on error. - */ -int bh_submit_read(struct buffer_head *bh) -{ - BUG_ON(!buffer_locked(bh)); - - if (buffer_uptodate(bh)) { - unlock_buffer(bh); - return 0; - } - - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - wait_on_buffer(bh); - if (buffer_uptodate(bh)) - return 0; - return -EIO; -} -EXPORT_SYMBOL(bh_submit_read); - void __init buffer_init(void) { int nrpages; diff --git a/trunk/fs/ext2/super.c b/trunk/fs/ext2/super.c index 6abaf75163f0..154e25f13d77 100644 --- a/trunk/fs/ext2/super.c +++ b/trunk/fs/ext2/super.c @@ -680,31 +680,11 @@ static int ext2_check_descriptors (struct super_block * sb) static loff_t ext2_max_size(int bits) { loff_t res = EXT2_NDIR_BLOCKS; - int meta_blocks; - loff_t upper_limit; - - /* This is calculated to be the largest file size for a - * dense, file such that the total number of + /* This constant is calculated to be the largest file size for a + * dense, 4k-blocksize file such that the total number of * sectors in the file, including data and all indirect blocks, - * does not exceed 2^32 -1 - * __u32 i_blocks representing the total number of - * 512 bytes blocks of the file - */ - upper_limit = (1LL << 32) - 1; - - /* total blocks in file system block size */ - upper_limit >>= (bits - 9); - - - /* indirect blocks */ - meta_blocks = 1; - /* double indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)); - /* tripple indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); - - upper_limit -= meta_blocks; - upper_limit <<= bits; + * does not exceed 2^32. */ + const loff_t upper_limit = 0x1ff7fffd000LL; res += 1LL << (bits-2); res += 1LL << (2*(bits-2)); @@ -712,10 +692,6 @@ static loff_t ext2_max_size(int bits) res <<= bits; if (res > upper_limit) res = upper_limit; - - if (res > MAX_LFS_FILESIZE) - res = MAX_LFS_FILESIZE; - return res; } diff --git a/trunk/fs/ext3/super.c b/trunk/fs/ext3/super.c index f3675cc630e9..cb14de1502c3 100644 --- a/trunk/fs/ext3/super.c +++ b/trunk/fs/ext3/super.c @@ -1436,31 +1436,11 @@ static void ext3_orphan_cleanup (struct super_block * sb, static loff_t ext3_max_size(int bits) { loff_t res = EXT3_NDIR_BLOCKS; - int meta_blocks; - loff_t upper_limit; - - /* This is calculated to be the largest file size for a - * dense, file such that the total number of + /* This constant is calculated to be the largest file size for a + * dense, 4k-blocksize file such that the total number of * sectors in the file, including data and all indirect blocks, - * does not exceed 2^32 -1 - * __u32 i_blocks representing the total number of - * 512 bytes blocks of the file - */ - upper_limit = (1LL << 32) - 1; - - /* total blocks in file system block size */ - upper_limit >>= (bits - 9); - - - /* indirect blocks */ - meta_blocks = 1; - /* double indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)); - /* tripple indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); - - upper_limit -= meta_blocks; - upper_limit <<= bits; + * does not exceed 2^32. */ + const loff_t upper_limit = 0x1ff7fffd000LL; res += 1LL << (bits-2); res += 1LL << (2*(bits-2)); @@ -1468,10 +1448,6 @@ static loff_t ext3_max_size(int bits) res <<= bits; if (res > upper_limit) res = upper_limit; - - if (res > MAX_LFS_FILESIZE) - res = MAX_LFS_FILESIZE; - return res; } diff --git a/trunk/fs/ext4/Makefile b/trunk/fs/ext4/Makefile index ac6fa8ca0a2f..ae6e7e502ac9 100644 --- a/trunk/fs/ext4/Makefile +++ b/trunk/fs/ext4/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ - ext4_jbd2.o migrate.o mballoc.o + ext4_jbd2.o ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o diff --git a/trunk/fs/ext4/balloc.c b/trunk/fs/ext4/balloc.c index ac75ea953d83..71ee95e534fd 100644 --- a/trunk/fs/ext4/balloc.c +++ b/trunk/fs/ext4/balloc.c @@ -29,7 +29,7 @@ * Calculate the block group number and offset, given a block number */ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, - ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp) + unsigned long *blockgrpp, ext4_grpblk_t *offsetp) { struct ext4_super_block *es = EXT4_SB(sb)->s_es; ext4_grpblk_t offset; @@ -46,7 +46,7 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, /* Initializes an uninitialized block bitmap if given, and returns the * number of blocks free in the group. */ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, - ext4_group_t block_group, struct ext4_group_desc *gdp) + int block_group, struct ext4_group_desc *gdp) { unsigned long start; int bit, bit_max; @@ -60,7 +60,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { ext4_error(sb, __FUNCTION__, - "Checksum bad for group %lu\n", block_group); + "Checksum bad for group %u\n", block_group); gdp->bg_free_blocks_count = 0; gdp->bg_free_inodes_count = 0; gdp->bg_itable_unused = 0; @@ -153,7 +153,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, * group descriptor */ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, - ext4_group_t block_group, + unsigned int block_group, struct buffer_head ** bh) { unsigned long group_desc; @@ -164,7 +164,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, if (block_group >= sbi->s_groups_count) { ext4_error (sb, "ext4_get_group_desc", "block_group >= groups_count - " - "block_group = %lu, groups_count = %lu", + "block_group = %d, groups_count = %lu", block_group, sbi->s_groups_count); return NULL; @@ -176,7 +176,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, if (!sbi->s_group_desc[group_desc]) { ext4_error (sb, "ext4_get_group_desc", "Group descriptor not loaded - " - "block_group = %lu, group_desc = %lu, desc = %lu", + "block_group = %d, group_desc = %lu, desc = %lu", block_group, group_desc, offset); return NULL; } @@ -189,70 +189,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, return desc; } -static int ext4_valid_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - unsigned int block_group, - struct buffer_head *bh) -{ - ext4_grpblk_t offset; - ext4_grpblk_t next_zero_bit; - ext4_fsblk_t bitmap_blk; - ext4_fsblk_t group_first_block; - - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { - /* with FLEX_BG, the inode/block bitmaps and itable - * blocks may not be in the group at all - * so the bitmap validation will be skipped for those groups - * or it has to also read the block group where the bitmaps - * are located to verify they are set. - */ - return 1; - } - group_first_block = ext4_group_first_block_no(sb, block_group); - - /* check whether block bitmap block number is set */ - bitmap_blk = ext4_block_bitmap(sb, desc); - offset = bitmap_blk - group_first_block; - if (!ext4_test_bit(offset, bh->b_data)) - /* bad block bitmap */ - goto err_out; - - /* check whether the inode bitmap block number is set */ - bitmap_blk = ext4_inode_bitmap(sb, desc); - offset = bitmap_blk - group_first_block; - if (!ext4_test_bit(offset, bh->b_data)) - /* bad block bitmap */ - goto err_out; - - /* check whether the inode table block number is set */ - bitmap_blk = ext4_inode_table(sb, desc); - offset = bitmap_blk - group_first_block; - next_zero_bit = ext4_find_next_zero_bit(bh->b_data, - offset + EXT4_SB(sb)->s_itb_per_group, - offset); - if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group) - /* good bitmap for inode tables */ - return 1; - -err_out: - ext4_error(sb, __FUNCTION__, - "Invalid block bitmap - " - "block_group = %d, block = %llu", - block_group, bitmap_blk); - return 0; -} /** * read_block_bitmap() * @sb: super block * @block_group: given block group * - * Read the bitmap for a given block_group,and validate the - * bits for block/inode/inode tables are set in the bitmaps + * Read the bitmap for a given block_group, reading into the specified + * slot in the superblock's bitmap cache. * * Return buffer_head on success or NULL in case of failure. */ struct buffer_head * -read_block_bitmap(struct super_block *sb, ext4_group_t block_group) +read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext4_group_desc * desc; struct buffer_head * bh = NULL; @@ -262,36 +210,25 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group) if (!desc) return NULL; bitmap_blk = ext4_block_bitmap(sb, desc); - bh = sb_getblk(sb, bitmap_blk); - if (unlikely(!bh)) { - ext4_error(sb, __FUNCTION__, - "Cannot read block bitmap - " - "block_group = %d, block_bitmap = %llu", - (int)block_group, (unsigned long long)bitmap_blk); - return NULL; - } - if (bh_uptodate_or_lock(bh)) - return bh; - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - ext4_init_block_bitmap(sb, bh, block_group, desc); - set_buffer_uptodate(bh); - unlock_buffer(bh); - return bh; + bh = sb_getblk(sb, bitmap_blk); + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + ext4_init_block_bitmap(sb, bh, block_group, + desc); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); + } + } else { + bh = sb_bread(sb, bitmap_blk); } - if (bh_submit_read(bh) < 0) { - put_bh(bh); - ext4_error(sb, __FUNCTION__, + if (!bh) + ext4_error (sb, __FUNCTION__, "Cannot read block bitmap - " "block_group = %d, block_bitmap = %llu", - (int)block_group, (unsigned long long)bitmap_blk); - return NULL; - } - if (!ext4_valid_block_bitmap(sb, desc, block_group, bh)) { - put_bh(bh); - return NULL; - } - + block_group, bitmap_blk); return bh; } /* @@ -383,7 +320,7 @@ static void __rsv_window_dump(struct rb_root *root, int verbose, */ static int goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, - ext4_group_t group, struct super_block *sb) + unsigned int group, struct super_block * sb) { ext4_fsblk_t group_first_block, group_last_block; @@ -526,7 +463,7 @@ static inline int rsv_is_empty(struct ext4_reserve_window *rsv) * when setting the reservation window size through ioctl before the file * is open for write (needs block allocation). * - * Needs down_write(i_data_sem) protection prior to call this function. + * Needs truncate_mutex protection prior to call this function. */ void ext4_init_block_alloc_info(struct inode *inode) { @@ -577,8 +514,6 @@ void ext4_discard_reservation(struct inode *inode) struct ext4_reserve_window_node *rsv; spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; - ext4_mb_discard_inode_preallocations(inode); - if (!block_i) return; @@ -605,7 +540,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, { struct buffer_head *bitmap_bh = NULL; struct buffer_head *gd_bh; - ext4_group_t block_group; + unsigned long block_group; ext4_grpblk_t bit; unsigned long i; unsigned long overflow; @@ -652,13 +587,11 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, in_range(ext4_inode_bitmap(sb, desc), block, count) || in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || in_range(block + count - 1, ext4_inode_table(sb, desc), - sbi->s_itb_per_group)) { + sbi->s_itb_per_group)) ext4_error (sb, "ext4_free_blocks", "Freeing blocks in system zones - " "Block = %llu, count = %lu", block, count); - goto error_return; - } /* * We are about to start releasing blocks in the bitmap, @@ -787,29 +720,19 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, * @inode: inode * @block: start physical block to free * @count: number of blocks to count - * @metadata: Are these metadata blocks */ void ext4_free_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count, - int metadata) + ext4_fsblk_t block, unsigned long count) { struct super_block * sb; unsigned long dquot_freed_blocks; - /* this isn't the right place to decide whether block is metadata - * inode.c/extents.c knows better, but for safety ... */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || - ext4_should_journal_data(inode)) - metadata = 1; - sb = inode->i_sb; - - if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) - ext4_free_blocks_sb(handle, sb, block, count, - &dquot_freed_blocks); - else - ext4_mb_free_blocks(handle, inode, block, count, - metadata, &dquot_freed_blocks); + if (!sb) { + printk ("ext4_free_blocks: nonexistent device"); + return; + } + ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); if (dquot_freed_blocks) DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); return; @@ -997,10 +920,9 @@ claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) * ext4_journal_release_buffer(), else we'll run out of credits. */ static ext4_grpblk_t -ext4_try_to_allocate(struct super_block *sb, handle_t *handle, - ext4_group_t group, struct buffer_head *bitmap_bh, - ext4_grpblk_t grp_goal, unsigned long *count, - struct ext4_reserve_window *my_rsv) +ext4_try_to_allocate(struct super_block *sb, handle_t *handle, int group, + struct buffer_head *bitmap_bh, ext4_grpblk_t grp_goal, + unsigned long *count, struct ext4_reserve_window *my_rsv) { ext4_fsblk_t group_first_block; ext4_grpblk_t start, end; @@ -1234,7 +1156,7 @@ static int find_next_reservable_window( */ static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, ext4_grpblk_t grp_goal, struct super_block *sb, - ext4_group_t group, struct buffer_head *bitmap_bh) + unsigned int group, struct buffer_head *bitmap_bh) { struct ext4_reserve_window_node *search_head; ext4_fsblk_t group_first_block, group_end_block, start_block; @@ -1432,7 +1354,7 @@ static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, */ static ext4_grpblk_t ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, - ext4_group_t group, struct buffer_head *bitmap_bh, + unsigned int group, struct buffer_head *bitmap_bh, ext4_grpblk_t grp_goal, struct ext4_reserve_window_node * my_rsv, unsigned long *count, int *errp) @@ -1588,7 +1510,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) } /** - * ext4_new_blocks_old() -- core block(s) allocation function + * ext4_new_blocks() -- core block(s) allocation function * @handle: handle to this transaction * @inode: file inode * @goal: given target block(filesystem wide) @@ -1601,17 +1523,17 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) * any specific goal block. * */ -ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, +ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t goal, unsigned long *count, int *errp) { struct buffer_head *bitmap_bh = NULL; struct buffer_head *gdp_bh; - ext4_group_t group_no; - ext4_group_t goal_group; + unsigned long group_no; + int goal_group; ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ - ext4_group_t bgi; /* blockgroup iteration index */ + int bgi; /* blockgroup iteration index */ int fatal = 0, err; int performed_allocation = 0; ext4_grpblk_t free_blocks; /* number of free blocks in a group */ @@ -1622,7 +1544,10 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, struct ext4_reserve_window_node *my_rsv = NULL; struct ext4_block_alloc_info *block_i; unsigned short windowsz = 0; - ext4_group_t ngroups; +#ifdef EXT4FS_DEBUG + static int goal_hits, goal_attempts; +#endif + unsigned long ngroups; unsigned long num = *count; *errp = -ENOSPC; @@ -1642,7 +1567,7 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, sbi = EXT4_SB(sb); es = EXT4_SB(sb)->s_es; - ext4_debug("goal=%llu.\n", goal); + ext4_debug("goal=%lu.\n", goal); /* * Allocate a block from reservation only when * filesystem is mounted with reservation(default,-o reservation), and @@ -1752,7 +1677,7 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, allocated: - ext4_debug("using block group %lu(%d)\n", + ext4_debug("using block group %d(%d)\n", group_no, gdp->bg_free_blocks_count); BUFFER_TRACE(gdp_bh, "get_write_access"); @@ -1767,13 +1692,11 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, in_range(ret_block, ext4_inode_table(sb, gdp), EXT4_SB(sb)->s_itb_per_group) || in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) { + EXT4_SB(sb)->s_itb_per_group)) ext4_error(sb, "ext4_new_block", "Allocating block in system zone - " "blocks from %llu, length %lu", ret_block, num); - goto out; - } performed_allocation = 1; @@ -1820,6 +1743,9 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, * list of some description. We don't know in advance whether * the caller wants to use it as metadata or data. */ + ext4_debug("allocating block %lu. Goal hits %d of %d.\n", + ret_block, goal_hits, goal_attempts); + spin_lock(sb_bgl_lock(sbi, group_no)); if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); @@ -1861,46 +1787,13 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, } ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, int *errp) + ext4_fsblk_t goal, int *errp) { - struct ext4_allocation_request ar; - ext4_fsblk_t ret; + unsigned long count = 1; - if (!test_opt(inode->i_sb, MBALLOC)) { - unsigned long count = 1; - ret = ext4_new_blocks_old(handle, inode, goal, &count, errp); - return ret; - } - - memset(&ar, 0, sizeof(ar)); - ar.inode = inode; - ar.goal = goal; - ar.len = 1; - ret = ext4_mb_new_blocks(handle, &ar, errp); - return ret; -} - -ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp) -{ - struct ext4_allocation_request ar; - ext4_fsblk_t ret; - - if (!test_opt(inode->i_sb, MBALLOC)) { - ret = ext4_new_blocks_old(handle, inode, goal, count, errp); - return ret; - } - - memset(&ar, 0, sizeof(ar)); - ar.inode = inode; - ar.goal = goal; - ar.len = *count; - ret = ext4_mb_new_blocks(handle, &ar, errp); - *count = ar.len; - return ret; + return ext4_new_blocks(handle, inode, goal, &count, errp); } - /** * ext4_count_free_blocks() -- count filesystem free blocks * @sb: superblock @@ -1911,8 +1804,8 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) { ext4_fsblk_t desc_count; struct ext4_group_desc *gdp; - ext4_group_t i; - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + int i; + unsigned long ngroups = EXT4_SB(sb)->s_groups_count; #ifdef EXT4FS_DEBUG struct ext4_super_block *es; ext4_fsblk_t bitmap_count; @@ -1936,14 +1829,14 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) continue; x = ext4_count_free(bitmap_bh, sb->s_blocksize); - printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", + printk("group %d: stored = %d, counted = %lu\n", i, le16_to_cpu(gdp->bg_free_blocks_count), x); bitmap_count += x; } brelse(bitmap_bh); printk("ext4_count_free_blocks: stored = %llu" ", computed = %llu, %llu\n", - ext4_free_blocks_count(es), + EXT4_FREE_BLOCKS_COUNT(es), desc_count, bitmap_count); return bitmap_count; #else @@ -1960,7 +1853,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) #endif } -static inline int test_root(ext4_group_t a, int b) +static inline int test_root(int a, int b) { int num = b; @@ -1969,7 +1862,7 @@ static inline int test_root(ext4_group_t a, int b) return num == a; } -static int ext4_group_sparse(ext4_group_t group) +static int ext4_group_sparse(int group) { if (group <= 1) return 1; @@ -1987,7 +1880,7 @@ static int ext4_group_sparse(ext4_group_t group) * Return the number of blocks used by the superblock (primary or backup) * in this group. Currently this will be only 0 or 1. */ -int ext4_bg_has_super(struct super_block *sb, ext4_group_t group) +int ext4_bg_has_super(struct super_block *sb, int group) { if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && @@ -1996,20 +1889,18 @@ int ext4_bg_has_super(struct super_block *sb, ext4_group_t group) return 1; } -static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, - ext4_group_t group) +static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, int group) { unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); - ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb); - ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1; + unsigned long first = metagroup * EXT4_DESC_PER_BLOCK(sb); + unsigned long last = first + EXT4_DESC_PER_BLOCK(sb) - 1; if (group == first || group == first + 1 || group == last) return 1; return 0; } -static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, - ext4_group_t group) +static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group) { if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && @@ -2027,7 +1918,7 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, * (primary or backup) in this group. In the future there may be a * different number of descriptor blocks in each group. */ -unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) +unsigned long ext4_bg_num_gdb(struct super_block *sb, int group) { unsigned long first_meta_bg = le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); diff --git a/trunk/fs/ext4/dir.c b/trunk/fs/ext4/dir.c index 33888bb58144..f612bef98315 100644 --- a/trunk/fs/ext4/dir.c +++ b/trunk/fs/ext4/dir.c @@ -67,7 +67,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, unsigned long offset) { const char * error_msg = NULL; - const int rlen = ext4_rec_len_from_disk(de->rec_len); + const int rlen = le16_to_cpu(de->rec_len); if (rlen < EXT4_DIR_REC_LEN(1)) error_msg = "rec_len is smaller than minimal"; @@ -124,7 +124,7 @@ static int ext4_readdir(struct file * filp, offset = filp->f_pos & (sb->s_blocksize - 1); while (!error && !stored && filp->f_pos < inode->i_size) { - ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); + unsigned long blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); struct buffer_head map_bh; struct buffer_head *bh = NULL; @@ -172,10 +172,10 @@ static int ext4_readdir(struct file * filp, * least that it is non-zero. A * failure will be detected in the * dirent test below. */ - if (ext4_rec_len_from_disk(de->rec_len) - < EXT4_DIR_REC_LEN(1)) + if (le16_to_cpu(de->rec_len) < + EXT4_DIR_REC_LEN(1)) break; - i += ext4_rec_len_from_disk(de->rec_len); + i += le16_to_cpu(de->rec_len); } offset = i; filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) @@ -197,7 +197,7 @@ static int ext4_readdir(struct file * filp, ret = stored; goto out; } - offset += ext4_rec_len_from_disk(de->rec_len); + offset += le16_to_cpu(de->rec_len); if (le32_to_cpu(de->inode)) { /* We might block in the next section * if the data destination is @@ -219,7 +219,7 @@ static int ext4_readdir(struct file * filp, goto revalidate; stored ++; } - filp->f_pos += ext4_rec_len_from_disk(de->rec_len); + filp->f_pos += le16_to_cpu(de->rec_len); } offset = 0; brelse (bh); diff --git a/trunk/fs/ext4/extents.c b/trunk/fs/ext4/extents.c index bc7081f1fbe8..85287742f2ae 100644 --- a/trunk/fs/ext4/extents.c +++ b/trunk/fs/ext4/extents.c @@ -61,7 +61,7 @@ static ext4_fsblk_t ext_pblock(struct ext4_extent *ex) * idx_pblock: * combine low and high parts of a leaf physical block number into ext4_fsblk_t */ -ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) +static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) { ext4_fsblk_t block; @@ -75,7 +75,7 @@ ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) * stores a large physical block number into an extent struct, * breaking it into parts */ -void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) +static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) { ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); @@ -144,7 +144,7 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode, static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, struct ext4_ext_path *path, - ext4_lblk_t block) + ext4_fsblk_t block) { struct ext4_inode_info *ei = EXT4_I(inode); ext4_fsblk_t bg_start; @@ -367,14 +367,13 @@ static void ext4_ext_drop_refs(struct ext4_ext_path *path) * the header must be checked before calling this */ static void -ext4_ext_binsearch_idx(struct inode *inode, - struct ext4_ext_path *path, ext4_lblk_t block) +ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block) { struct ext4_extent_header *eh = path->p_hdr; struct ext4_extent_idx *r, *l, *m; - ext_debug("binsearch for %u(idx): ", block); + ext_debug("binsearch for %d(idx): ", block); l = EXT_FIRST_INDEX(eh) + 1; r = EXT_LAST_INDEX(eh); @@ -426,8 +425,7 @@ ext4_ext_binsearch_idx(struct inode *inode, * the header must be checked before calling this */ static void -ext4_ext_binsearch(struct inode *inode, - struct ext4_ext_path *path, ext4_lblk_t block) +ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) { struct ext4_extent_header *eh = path->p_hdr; struct ext4_extent *r, *l, *m; @@ -440,7 +438,7 @@ ext4_ext_binsearch(struct inode *inode, return; } - ext_debug("binsearch for %u: ", block); + ext_debug("binsearch for %d: ", block); l = EXT_FIRST_EXTENT(eh) + 1; r = EXT_LAST_EXTENT(eh); @@ -496,8 +494,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) } struct ext4_ext_path * -ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, - struct ext4_ext_path *path) +ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path) { struct ext4_extent_header *eh; struct buffer_head *bh; @@ -766,7 +763,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, while (k--) { oldblock = newblock; newblock = ablocks[--a]; - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk(inode->i_sb, (ext4_fsblk_t)newblock); if (!bh) { err = -EIO; goto cleanup; @@ -786,8 +783,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, fidx->ei_block = border; ext4_idx_store_pblock(fidx, oldblock); - ext_debug("int.index at %d (block %llu): %u -> %llu\n", - i, newblock, le32_to_cpu(border), oldblock); + ext_debug("int.index at %d (block %llu): %lu -> %llu\n", i, + newblock, (unsigned long) le32_to_cpu(border), + oldblock); /* copy indexes */ m = 0; path[i].p_idx++; @@ -853,7 +851,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; - ext4_free_blocks(handle, inode, ablocks[i], 1, 1); + ext4_free_blocks(handle, inode, ablocks[i], 1); } } kfree(ablocks); @@ -981,8 +979,8 @@ static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, /* refill path */ ext4_ext_drop_refs(path); path = ext4_ext_find_extent(inode, - (ext4_lblk_t)le32_to_cpu(newext->ee_block), - path); + le32_to_cpu(newext->ee_block), + path); if (IS_ERR(path)) err = PTR_ERR(path); } else { @@ -994,8 +992,8 @@ static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, /* refill path */ ext4_ext_drop_refs(path); path = ext4_ext_find_extent(inode, - (ext4_lblk_t)le32_to_cpu(newext->ee_block), - path); + le32_to_cpu(newext->ee_block), + path); if (IS_ERR(path)) { err = PTR_ERR(path); goto out; @@ -1016,150 +1014,6 @@ static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, return err; } -/* - * search the closest allocated block to the left for *logical - * and returns it at @logical + it's physical address at @phys - * if *logical is the smallest allocated block, the function - * returns 0 at @phys - * return value contains 0 (success) or error code - */ -int -ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, - ext4_lblk_t *logical, ext4_fsblk_t *phys) -{ - struct ext4_extent_idx *ix; - struct ext4_extent *ex; - int depth, ee_len; - - BUG_ON(path == NULL); - depth = path->p_depth; - *phys = 0; - - if (depth == 0 && path->p_ext == NULL) - return 0; - - /* usually extent in the path covers blocks smaller - * then *logical, but it can be that extent is the - * first one in the file */ - - ex = path[depth].p_ext; - ee_len = ext4_ext_get_actual_len(ex); - if (*logical < le32_to_cpu(ex->ee_block)) { - BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); - while (--depth >= 0) { - ix = path[depth].p_idx; - BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); - } - return 0; - } - - BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); - - *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; - *phys = ext_pblock(ex) + ee_len - 1; - return 0; -} - -/* - * search the closest allocated block to the right for *logical - * and returns it at @logical + it's physical address at @phys - * if *logical is the smallest allocated block, the function - * returns 0 at @phys - * return value contains 0 (success) or error code - */ -int -ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, - ext4_lblk_t *logical, ext4_fsblk_t *phys) -{ - struct buffer_head *bh = NULL; - struct ext4_extent_header *eh; - struct ext4_extent_idx *ix; - struct ext4_extent *ex; - ext4_fsblk_t block; - int depth, ee_len; - - BUG_ON(path == NULL); - depth = path->p_depth; - *phys = 0; - - if (depth == 0 && path->p_ext == NULL) - return 0; - - /* usually extent in the path covers blocks smaller - * then *logical, but it can be that extent is the - * first one in the file */ - - ex = path[depth].p_ext; - ee_len = ext4_ext_get_actual_len(ex); - if (*logical < le32_to_cpu(ex->ee_block)) { - BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); - while (--depth >= 0) { - ix = path[depth].p_idx; - BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); - } - *logical = le32_to_cpu(ex->ee_block); - *phys = ext_pblock(ex); - return 0; - } - - BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); - - if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { - /* next allocated block in this leaf */ - ex++; - *logical = le32_to_cpu(ex->ee_block); - *phys = ext_pblock(ex); - return 0; - } - - /* go up and search for index to the right */ - while (--depth >= 0) { - ix = path[depth].p_idx; - if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) - break; - } - - if (depth < 0) { - /* we've gone up to the root and - * found no index to the right */ - return 0; - } - - /* we've found index to the right, let's - * follow it and find the closest allocated - * block to the right */ - ix++; - block = idx_pblock(ix); - while (++depth < path->p_depth) { - bh = sb_bread(inode->i_sb, block); - if (bh == NULL) - return -EIO; - eh = ext_block_hdr(bh); - if (ext4_ext_check_header(inode, eh, depth)) { - put_bh(bh); - return -EIO; - } - ix = EXT_FIRST_INDEX(eh); - block = idx_pblock(ix); - put_bh(bh); - } - - bh = sb_bread(inode->i_sb, block); - if (bh == NULL) - return -EIO; - eh = ext_block_hdr(bh); - if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) { - put_bh(bh); - return -EIO; - } - ex = EXT_FIRST_EXTENT(eh); - *logical = le32_to_cpu(ex->ee_block); - *phys = ext_pblock(ex); - put_bh(bh); - return 0; - -} - /* * ext4_ext_next_allocated_block: * returns allocated block in subsequent extent or EXT_MAX_BLOCK. @@ -1167,7 +1021,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, * allocated block. Thus, index entries have to be consistent * with leaves. */ -static ext4_lblk_t +static unsigned long ext4_ext_next_allocated_block(struct ext4_ext_path *path) { int depth; @@ -1200,7 +1054,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) * ext4_ext_next_leaf_block: * returns first allocated block from next leaf or EXT_MAX_BLOCK */ -static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, +static unsigned ext4_ext_next_leaf_block(struct inode *inode, struct ext4_ext_path *path) { int depth; @@ -1218,8 +1072,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, while (depth >= 0) { if (path[depth].p_idx != EXT_LAST_INDEX(path[depth].p_hdr)) - return (ext4_lblk_t) - le32_to_cpu(path[depth].p_idx[1].ei_block); + return le32_to_cpu(path[depth].p_idx[1].ei_block); depth--; } @@ -1232,7 +1085,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, * then we have to correct all indexes above. * TODO: do we need to correct tree in all cases? */ -static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, +int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { struct ext4_extent_header *eh; @@ -1318,7 +1171,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, if (ext1_ee_len + ext2_ee_len > max_len) return 0; #ifdef AGGRESSIVE_TEST - if (ext1_ee_len >= 4) + if (le16_to_cpu(ex1->ee_len) >= 4) return 0; #endif @@ -1386,7 +1239,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode, struct ext4_extent *newext, struct ext4_ext_path *path) { - ext4_lblk_t b1, b2; + unsigned long b1, b2; unsigned int depth, len1; unsigned int ret = 0; @@ -1407,7 +1260,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode, goto out; } - /* check for wrap through zero on extent logical start block*/ + /* check for wrap through zero */ if (b1 + len1 < b1) { len1 = EXT_MAX_BLOCK - b1; newext->ee_len = cpu_to_le16(len1); @@ -1437,8 +1290,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_extent *ex, *fex; struct ext4_extent *nearex; /* nearest extent */ struct ext4_ext_path *npath = NULL; - int depth, len, err; - ext4_lblk_t next; + int depth, len, err, next; unsigned uninitialized = 0; BUG_ON(ext4_ext_get_actual_len(newext) == 0); @@ -1583,8 +1435,114 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, return err; } +int ext4_ext_walk_space(struct inode *inode, unsigned long block, + unsigned long num, ext_prepare_callback func, + void *cbdata) +{ + struct ext4_ext_path *path = NULL; + struct ext4_ext_cache cbex; + struct ext4_extent *ex; + unsigned long next, start = 0, end = 0; + unsigned long last = block + num; + int depth, exists, err = 0; + + BUG_ON(func == NULL); + BUG_ON(inode == NULL); + + while (block < last && block != EXT_MAX_BLOCK) { + num = last - block; + /* find extent for this block */ + path = ext4_ext_find_extent(inode, block, path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; + break; + } + + depth = ext_depth(inode); + BUG_ON(path[depth].p_hdr == NULL); + ex = path[depth].p_ext; + next = ext4_ext_next_allocated_block(path); + + exists = 0; + if (!ex) { + /* there is no extent yet, so try to allocate + * all requested space */ + start = block; + end = block + num; + } else if (le32_to_cpu(ex->ee_block) > block) { + /* need to allocate space before found extent */ + start = block; + end = le32_to_cpu(ex->ee_block); + if (block + num < end) + end = block + num; + } else if (block >= le32_to_cpu(ex->ee_block) + + ext4_ext_get_actual_len(ex)) { + /* need to allocate space after found extent */ + start = block; + end = block + num; + if (end >= next) + end = next; + } else if (block >= le32_to_cpu(ex->ee_block)) { + /* + * some part of requested space is covered + * by found extent + */ + start = block; + end = le32_to_cpu(ex->ee_block) + + ext4_ext_get_actual_len(ex); + if (block + num < end) + end = block + num; + exists = 1; + } else { + BUG(); + } + BUG_ON(end <= start); + + if (!exists) { + cbex.ec_block = start; + cbex.ec_len = end - start; + cbex.ec_start = 0; + cbex.ec_type = EXT4_EXT_CACHE_GAP; + } else { + cbex.ec_block = le32_to_cpu(ex->ee_block); + cbex.ec_len = ext4_ext_get_actual_len(ex); + cbex.ec_start = ext_pblock(ex); + cbex.ec_type = EXT4_EXT_CACHE_EXTENT; + } + + BUG_ON(cbex.ec_len == 0); + err = func(inode, path, &cbex, cbdata); + ext4_ext_drop_refs(path); + + if (err < 0) + break; + if (err == EXT_REPEAT) + continue; + else if (err == EXT_BREAK) { + err = 0; + break; + } + + if (ext_depth(inode) != depth) { + /* depth was changed. we have to realloc path */ + kfree(path); + path = NULL; + } + + block = cbex.ec_block + cbex.ec_len; + } + + if (path) { + ext4_ext_drop_refs(path); + kfree(path); + } + + return err; +} + static void -ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, +ext4_ext_put_in_cache(struct inode *inode, __u32 block, __u32 len, ext4_fsblk_t start, int type) { struct ext4_ext_cache *cex; @@ -1603,11 +1561,10 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, */ static void ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, - ext4_lblk_t block) + unsigned long block) { int depth = ext_depth(inode); - unsigned long len; - ext4_lblk_t lblock; + unsigned long lblock, len; struct ext4_extent *ex; ex = path[depth].p_ext; @@ -1619,34 +1576,32 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, } else if (block < le32_to_cpu(ex->ee_block)) { lblock = block; len = le32_to_cpu(ex->ee_block) - block; - ext_debug("cache gap(before): %u [%u:%u]", - block, - le32_to_cpu(ex->ee_block), - ext4_ext_get_actual_len(ex)); + ext_debug("cache gap(before): %lu [%lu:%lu]", + (unsigned long) block, + (unsigned long) le32_to_cpu(ex->ee_block), + (unsigned long) ext4_ext_get_actual_len(ex)); } else if (block >= le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex)) { - ext4_lblk_t next; lblock = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); - - next = ext4_ext_next_allocated_block(path); - ext_debug("cache gap(after): [%u:%u] %u", - le32_to_cpu(ex->ee_block), - ext4_ext_get_actual_len(ex), - block); - BUG_ON(next == lblock); - len = next - lblock; + len = ext4_ext_next_allocated_block(path); + ext_debug("cache gap(after): [%lu:%lu] %lu", + (unsigned long) le32_to_cpu(ex->ee_block), + (unsigned long) ext4_ext_get_actual_len(ex), + (unsigned long) block); + BUG_ON(len == lblock); + len = len - lblock; } else { lblock = len = 0; BUG(); } - ext_debug(" -> %u:%lu\n", lblock, len); + ext_debug(" -> %lu:%lu\n", (unsigned long) lblock, len); ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); } static int -ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, +ext4_ext_in_cache(struct inode *inode, unsigned long block, struct ext4_extent *ex) { struct ext4_ext_cache *cex; @@ -1663,9 +1618,11 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, ex->ee_block = cpu_to_le32(cex->ec_block); ext4_ext_store_pblock(ex, cex->ec_start); ex->ee_len = cpu_to_le16(cex->ec_len); - ext_debug("%u cached by %u:%u:%llu\n", - block, - cex->ec_block, cex->ec_len, cex->ec_start); + ext_debug("%lu cached by %lu:%lu:%llu\n", + (unsigned long) block, + (unsigned long) cex->ec_block, + (unsigned long) cex->ec_len, + cex->ec_start); return cex->ec_type; } @@ -1679,7 +1636,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, * It's used in truncate case only, thus all requests are for * last index in the block only. */ -static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, +int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { struct buffer_head *bh; @@ -1700,7 +1657,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, ext_debug("index is empty, remove it, free block %llu\n", leaf); bh = sb_find_get_block(inode->i_sb, leaf); ext4_forget(handle, 1, inode, bh, leaf); - ext4_free_blocks(handle, inode, leaf, 1, 1); + ext4_free_blocks(handle, inode, leaf, 1); return err; } @@ -1709,7 +1666,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, * This routine returns max. credits that the extent tree can consume. * It should be OK for low-performance paths like ->writepage() * To allow many writing processes to fit into a single transaction, - * the caller should calculate credits under i_data_sem and + * the caller should calculate credits under truncate_mutex and * pass the actual path. */ int ext4_ext_calc_credits_for_insert(struct inode *inode, @@ -1757,14 +1714,12 @@ int ext4_ext_calc_credits_for_insert(struct inode *inode, static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_extent *ex, - ext4_lblk_t from, ext4_lblk_t to) + unsigned long from, unsigned long to) { struct buffer_head *bh; unsigned short ee_len = ext4_ext_get_actual_len(ex); - int i, metadata = 0; + int i; - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - metadata = 1; #ifdef EXTENTS_STATS { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -1783,45 +1738,42 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, if (from >= le32_to_cpu(ex->ee_block) && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { /* tail removal */ - ext4_lblk_t num; + unsigned long num; ext4_fsblk_t start; - num = le32_to_cpu(ex->ee_block) + ee_len - from; start = ext_pblock(ex) + ee_len - num; - ext_debug("free last %u blocks starting %llu\n", num, start); + ext_debug("free last %lu blocks starting %llu\n", num, start); for (i = 0; i < num; i++) { bh = sb_find_get_block(inode->i_sb, start + i); ext4_forget(handle, 0, inode, bh, start + i); } - ext4_free_blocks(handle, inode, start, num, metadata); + ext4_free_blocks(handle, inode, start, num); } else if (from == le32_to_cpu(ex->ee_block) && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { - printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", + printk("strange request: removal %lu-%lu from %u:%u\n", from, to, le32_to_cpu(ex->ee_block), ee_len); } else { - printk(KERN_INFO "strange request: removal(2) " - "%u-%u from %u:%u\n", - from, to, le32_to_cpu(ex->ee_block), ee_len); + printk("strange request: removal(2) %lu-%lu from %u:%u\n", + from, to, le32_to_cpu(ex->ee_block), ee_len); } return 0; } static int ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, ext4_lblk_t start) + struct ext4_ext_path *path, unsigned long start) { int err = 0, correct_index = 0; int depth = ext_depth(inode), credits; struct ext4_extent_header *eh; - ext4_lblk_t a, b, block; - unsigned num; - ext4_lblk_t ex_ee_block; + unsigned a, b, block, num; + unsigned long ex_ee_block; unsigned short ex_ee_len; unsigned uninitialized = 0; struct ext4_extent *ex; /* the header must be checked already in ext4_ext_remove_space() */ - ext_debug("truncate since %u in leaf\n", start); + ext_debug("truncate since %lu in leaf\n", start); if (!path[depth].p_hdr) path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); eh = path[depth].p_hdr; @@ -1952,7 +1904,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) return 1; } -static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) +int ext4_ext_remove_space(struct inode *inode, unsigned long start) { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); @@ -1960,7 +1912,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) handle_t *handle; int i = 0, err = 0; - ext_debug("truncate since %u\n", start); + ext_debug("truncate since %lu\n", start); /* probably first extent we're gonna free will be last in block */ handle = ext4_journal_start(inode, depth + 1); @@ -2142,19 +2094,17 @@ void ext4_ext_release(struct super_block *sb) * b> Splits in two extents: Write is happening at either end of the extent * c> Splits in three extents: Somone is writing in middle of the extent */ -static int ext4_ext_convert_to_initialized(handle_t *handle, - struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t iblock, - unsigned long max_blocks) +int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + ext4_fsblk_t iblock, + unsigned long max_blocks) { struct ext4_extent *ex, newex; struct ext4_extent *ex1 = NULL; struct ext4_extent *ex2 = NULL; struct ext4_extent *ex3 = NULL; struct ext4_extent_header *eh; - ext4_lblk_t ee_block; - unsigned int allocated, ee_len, depth; + unsigned int allocated, ee_block, ee_len, depth; ext4_fsblk_t newblock; int err = 0; int ret = 0; @@ -2275,13 +2225,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, return err ? err : allocated; } -/* - * Need to be called with - * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block - * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) - */ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, + ext4_fsblk_t iblock, unsigned long max_blocks, struct buffer_head *bh_result, int create, int extend_disksize) { @@ -2291,11 +2236,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t goal, newblock; int err = 0, depth, ret; unsigned long allocated = 0; - struct ext4_allocation_request ar; __clear_bit(BH_New, &bh_result->b_state); - ext_debug("blocks %u/%lu requested for inode %u\n", - iblock, max_blocks, inode->i_ino); + ext_debug("blocks %d/%lu requested for inode %u\n", (int) iblock, + max_blocks, (unsigned) inode->i_ino); + mutex_lock(&EXT4_I(inode)->truncate_mutex); /* check in cache */ goal = ext4_ext_in_cache(inode, iblock, &newex); @@ -2315,7 +2260,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, - le32_to_cpu(newex.ee_block) + ext_pblock(&newex); /* number of remaining blocks in the extent */ - allocated = ext4_ext_get_actual_len(&newex) - + allocated = le16_to_cpu(newex.ee_len) - (iblock - le32_to_cpu(newex.ee_block)); goto out; } else { @@ -2343,7 +2288,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ex = path[depth].p_ext; if (ex) { - ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); + unsigned long ee_block = le32_to_cpu(ex->ee_block); ext4_fsblk_t ee_start = ext_pblock(ex); unsigned short ee_len; @@ -2357,7 +2302,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, newblock = iblock - ee_block + ee_start; /* number of remaining blocks in the extent */ allocated = ee_len - (iblock - ee_block); - ext_debug("%u fit into %lu:%d -> %llu\n", iblock, + ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, ee_block, ee_len, newblock); /* Do not put uninitialized extent in the cache */ @@ -2375,10 +2320,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ret = ext4_ext_convert_to_initialized(handle, inode, path, iblock, max_blocks); - if (ret <= 0) { - err = ret; + if (ret <= 0) goto out2; - } else + else allocated = ret; goto outnew; } @@ -2403,15 +2347,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) ext4_init_block_alloc_info(inode); - /* find neighbour allocated blocks */ - ar.lleft = iblock; - err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); - if (err) - goto out2; - ar.lright = iblock; - err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); - if (err) - goto out2; + /* allocate new block */ + goal = ext4_ext_find_goal(inode, path, iblock); /* * See if request is beyond maximum number of blocks we can have in @@ -2431,21 +2368,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, newex.ee_len = cpu_to_le16(max_blocks); err = ext4_ext_check_overlap(inode, &newex, path); if (err) - allocated = ext4_ext_get_actual_len(&newex); + allocated = le16_to_cpu(newex.ee_len); else allocated = max_blocks; - - /* allocate new block */ - ar.inode = inode; - ar.goal = ext4_ext_find_goal(inode, path, iblock); - ar.logical = iblock; - ar.len = allocated; - if (S_ISREG(inode->i_mode)) - ar.flags = EXT4_MB_HINT_DATA; - else - /* disable in-core preallocation for non-regular files */ - ar.flags = 0; - newblock = ext4_mb_new_blocks(handle, &ar, &err); + newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err); if (!newblock) goto out2; ext_debug("allocate new block: goal %llu, found %llu/%lu\n", @@ -2453,17 +2379,14 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* try to insert new extent into found leaf and return */ ext4_ext_store_pblock(&newex, newblock); - newex.ee_len = cpu_to_le16(ar.len); + newex.ee_len = cpu_to_le16(allocated); if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ ext4_ext_mark_uninitialized(&newex); err = ext4_ext_insert_extent(handle, inode, path, &newex); if (err) { /* free data blocks we just allocated */ - /* not a good idea to call discard here directly, - * but otherwise we'd need to call it every free() */ - ext4_mb_discard_inode_preallocations(inode); ext4_free_blocks(handle, inode, ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), 0); + le16_to_cpu(newex.ee_len)); goto out2; } @@ -2472,7 +2395,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* previous routine could use block we allocated */ newblock = ext_pblock(&newex); - allocated = ext4_ext_get_actual_len(&newex); outnew: __set_bit(BH_New, &bh_result->b_state); @@ -2492,6 +2414,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ext4_ext_drop_refs(path); kfree(path); } + mutex_unlock(&EXT4_I(inode)->truncate_mutex); + return err ? err : allocated; } @@ -2499,7 +2423,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) { struct address_space *mapping = inode->i_mapping; struct super_block *sb = inode->i_sb; - ext4_lblk_t last_block; + unsigned long last_block; handle_t *handle; int err = 0; @@ -2521,11 +2445,9 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) if (page) ext4_block_truncate_page(handle, page, mapping, inode->i_size); - down_write(&EXT4_I(inode)->i_data_sem); + mutex_lock(&EXT4_I(inode)->truncate_mutex); ext4_ext_invalidate_cache(inode); - ext4_mb_discard_inode_preallocations(inode); - /* * TODO: optimization is possible here. * Probably we need not scan at all, @@ -2559,7 +2481,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) if (inode->i_nlink) ext4_orphan_del(handle, inode); - up_write(&EXT4_I(inode)->i_data_sem); + mutex_unlock(&EXT4_I(inode)->truncate_mutex); ext4_journal_stop(handle); } @@ -2594,8 +2516,7 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) { handle_t *handle; - ext4_lblk_t block; - unsigned long max_blocks; + ext4_fsblk_t block, max_blocks; ext4_fsblk_t nblocks = 0; int ret = 0; int ret2 = 0; @@ -2623,7 +2544,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) * modify 1 super block, 1 block bitmap and 1 group descriptor. */ credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; - down_write((&EXT4_I(inode)->i_data_sem)); retry: while (ret >= 0 && ret < max_blocks) { block = block + ret; @@ -2637,12 +2557,12 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) ret = ext4_ext_get_blocks(handle, inode, block, max_blocks, &map_bh, EXT4_CREATE_UNINITIALIZED_EXT, 0); - WARN_ON(ret <= 0); - if (ret <= 0) { + WARN_ON(!ret); + if (!ret) { ext4_error(inode->i_sb, "ext4_fallocate", - "ext4_ext_get_blocks returned error: " - "inode#%lu, block=%u, max_blocks=%lu", - inode->i_ino, block, max_blocks); + "ext4_ext_get_blocks returned 0! inode#%lu" + ", block=%llu, max_blocks=%llu", + inode->i_ino, block, max_blocks); ret = -EIO; ext4_mark_inode_dirty(handle, inode); ret2 = ext4_journal_stop(handle); @@ -2680,7 +2600,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; - up_write((&EXT4_I(inode)->i_data_sem)); /* * Time to update the file size. * Update only when preallocation was requested beyond the file size. diff --git a/trunk/fs/ext4/file.c b/trunk/fs/ext4/file.c index ac35ec58db55..1a81cd66d63b 100644 --- a/trunk/fs/ext4/file.c +++ b/trunk/fs/ext4/file.c @@ -37,9 +37,9 @@ static int ext4_release_file (struct inode * inode, struct file * filp) if ((filp->f_mode & FMODE_WRITE) && (atomic_read(&inode->i_writecount) == 1)) { - down_write(&EXT4_I(inode)->i_data_sem); + mutex_lock(&EXT4_I(inode)->truncate_mutex); ext4_discard_reservation(inode); - up_write(&EXT4_I(inode)->i_data_sem); + mutex_unlock(&EXT4_I(inode)->truncate_mutex); } if (is_dx(inode) && filp->private_data) ext4_htree_free_dir_info(filp->private_data); @@ -56,25 +56,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, ssize_t ret; int err; - /* - * If we have encountered a bitmap-format file, the size limit - * is smaller than s_maxbytes, which is for extent-mapped files. - */ - - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - size_t length = iov_length(iov, nr_segs); - - if (pos > sbi->s_bitmap_maxbytes) - return -EFBIG; - - if (pos + length > sbi->s_bitmap_maxbytes) { - nr_segs = iov_shorten((struct iovec *)iov, nr_segs, - sbi->s_bitmap_maxbytes - pos); - } - } - ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + /* * Skip flushing if there was an error, or if nothing was written. */ diff --git a/trunk/fs/ext4/group.h b/trunk/fs/ext4/group.h index 7eb0604e7eea..1577910bb58b 100644 --- a/trunk/fs/ext4/group.h +++ b/trunk/fs/ext4/group.h @@ -14,16 +14,14 @@ extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, struct ext4_group_desc *gdp); struct buffer_head *read_block_bitmap(struct super_block *sb, - ext4_group_t block_group); + unsigned int block_group); extern unsigned ext4_init_block_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t group, + struct buffer_head *bh, int group, struct ext4_group_desc *desc); #define ext4_free_blocks_after_init(sb, group, desc) \ ext4_init_block_bitmap(sb, NULL, group, desc) extern unsigned ext4_init_inode_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t group, + struct buffer_head *bh, int group, struct ext4_group_desc *desc); extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); #endif /* _LINUX_EXT4_GROUP_H */ diff --git a/trunk/fs/ext4/ialloc.c b/trunk/fs/ext4/ialloc.c index 575b5215c808..c61f37fd3f05 100644 --- a/trunk/fs/ext4/ialloc.c +++ b/trunk/fs/ext4/ialloc.c @@ -64,8 +64,8 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) } /* Initializes an uninitialized inode bitmap */ -unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, - ext4_group_t block_group, +unsigned ext4_init_inode_bitmap(struct super_block *sb, + struct buffer_head *bh, int block_group, struct ext4_group_desc *gdp) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -75,7 +75,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { - ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n", + ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n", block_group); gdp->bg_free_blocks_count = 0; gdp->bg_free_inodes_count = 0; @@ -98,7 +98,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, * Return buffer_head of bitmap on success or NULL. */ static struct buffer_head * -read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) +read_inode_bitmap(struct super_block * sb, unsigned long block_group) { struct ext4_group_desc *desc; struct buffer_head *bh = NULL; @@ -152,7 +152,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) unsigned long ino; struct buffer_head *bitmap_bh = NULL; struct buffer_head *bh2; - ext4_group_t block_group; + unsigned long block_group; unsigned long bit; struct ext4_group_desc * gdp; struct ext4_super_block * es; @@ -260,14 +260,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) * For other inodes, search forward from the parent directory\'s block * group to find a free inode. */ -static int find_group_dir(struct super_block *sb, struct inode *parent, - ext4_group_t *best_group) +static int find_group_dir(struct super_block *sb, struct inode *parent) { - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + int ngroups = EXT4_SB(sb)->s_groups_count; unsigned int freei, avefreei; struct ext4_group_desc *desc, *best_desc = NULL; - ext4_group_t group; - int ret = -1; + int group, best_group = -1; freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); avefreei = freei / ngroups; @@ -281,12 +279,11 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, if (!best_desc || (le16_to_cpu(desc->bg_free_blocks_count) > le16_to_cpu(best_desc->bg_free_blocks_count))) { - *best_group = group; + best_group = group; best_desc = desc; - ret = 0; } } - return ret; + return best_group; } /* @@ -317,13 +314,12 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, #define INODE_COST 64 #define BLOCK_COST 256 -static int find_group_orlov(struct super_block *sb, struct inode *parent, - ext4_group_t *group) +static int find_group_orlov(struct super_block *sb, struct inode *parent) { - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; + int parent_group = EXT4_I(parent)->i_block_group; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - ext4_group_t ngroups = sbi->s_groups_count; + int ngroups = sbi->s_groups_count; int inodes_per_group = EXT4_INODES_PER_GROUP(sb); unsigned int freei, avefreei; ext4_fsblk_t freeb, avefreeb; @@ -331,7 +327,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, unsigned int ndirs; int max_debt, max_dirs, min_inodes; ext4_grpblk_t min_blocks; - ext4_group_t i; + int group = -1, i; struct ext4_group_desc *desc; freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); @@ -344,14 +340,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, if ((parent == sb->s_root->d_inode) || (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { int best_ndir = inodes_per_group; - ext4_group_t grp; - int ret = -1; + int best_group = -1; - get_random_bytes(&grp, sizeof(grp)); - parent_group = (unsigned)grp % ngroups; + get_random_bytes(&group, sizeof(group)); + parent_group = (unsigned)group % ngroups; for (i = 0; i < ngroups; i++) { - grp = (parent_group + i) % ngroups; - desc = ext4_get_group_desc(sb, grp, NULL); + group = (parent_group + i) % ngroups; + desc = ext4_get_group_desc (sb, group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) @@ -360,12 +355,11 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, continue; if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) continue; - *group = grp; - ret = 0; + best_group = group; best_ndir = le16_to_cpu(desc->bg_used_dirs_count); } - if (ret == 0) - return ret; + if (best_group >= 0) + return best_group; goto fallback; } @@ -386,8 +380,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, max_debt = 1; for (i = 0; i < ngroups; i++) { - *group = (parent_group + i) % ngroups; - desc = ext4_get_group_desc(sb, *group, NULL); + group = (parent_group + i) % ngroups; + desc = ext4_get_group_desc (sb, group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) @@ -396,16 +390,17 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, continue; if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) continue; - return 0; + return group; } fallback: for (i = 0; i < ngroups; i++) { - *group = (parent_group + i) % ngroups; - desc = ext4_get_group_desc(sb, *group, NULL); - if (desc && desc->bg_free_inodes_count && - le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) - return 0; + group = (parent_group + i) % ngroups; + desc = ext4_get_group_desc (sb, group, NULL); + if (!desc || !desc->bg_free_inodes_count) + continue; + if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) + return group; } if (avefreei) { @@ -420,22 +415,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, return -1; } -static int find_group_other(struct super_block *sb, struct inode *parent, - ext4_group_t *group) +static int find_group_other(struct super_block *sb, struct inode *parent) { - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + int parent_group = EXT4_I(parent)->i_block_group; + int ngroups = EXT4_SB(sb)->s_groups_count; struct ext4_group_desc *desc; - ext4_group_t i; + int group, i; /* * Try to place the inode in its parent directory */ - *group = parent_group; - desc = ext4_get_group_desc(sb, *group, NULL); + group = parent_group; + desc = ext4_get_group_desc (sb, group, NULL); if (desc && le16_to_cpu(desc->bg_free_inodes_count) && le16_to_cpu(desc->bg_free_blocks_count)) - return 0; + return group; /* * We're going to place this inode in a different blockgroup from its @@ -446,33 +440,33 @@ static int find_group_other(struct super_block *sb, struct inode *parent, * * So add our directory's i_ino into the starting point for the hash. */ - *group = (*group + parent->i_ino) % ngroups; + group = (group + parent->i_ino) % ngroups; /* * Use a quadratic hash to find a group with a free inode and some free * blocks. */ for (i = 1; i < ngroups; i <<= 1) { - *group += i; - if (*group >= ngroups) - *group -= ngroups; - desc = ext4_get_group_desc(sb, *group, NULL); + group += i; + if (group >= ngroups) + group -= ngroups; + desc = ext4_get_group_desc (sb, group, NULL); if (desc && le16_to_cpu(desc->bg_free_inodes_count) && le16_to_cpu(desc->bg_free_blocks_count)) - return 0; + return group; } /* * That failed: try linear search for a free inode, even if that group * has no free blocks. */ - *group = parent_group; + group = parent_group; for (i = 0; i < ngroups; i++) { - if (++*group >= ngroups) - *group = 0; - desc = ext4_get_group_desc(sb, *group, NULL); + if (++group >= ngroups) + group = 0; + desc = ext4_get_group_desc (sb, group, NULL); if (desc && le16_to_cpu(desc->bg_free_inodes_count)) - return 0; + return group; } return -1; @@ -493,17 +487,16 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) struct super_block *sb; struct buffer_head *bitmap_bh = NULL; struct buffer_head *bh2; - ext4_group_t group = 0; + int group; unsigned long ino = 0; struct inode * inode; struct ext4_group_desc * gdp = NULL; struct ext4_super_block * es; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; - int ret2, err = 0; + int err = 0; struct inode *ret; - ext4_group_t i; - int free = 0; + int i, free = 0; /* Cannot create files in a deleted directory */ if (!dir || !dir->i_nlink) @@ -519,14 +512,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) es = sbi->s_es; if (S_ISDIR(mode)) { if (test_opt (sb, OLDALLOC)) - ret2 = find_group_dir(sb, dir, &group); + group = find_group_dir(sb, dir); else - ret2 = find_group_orlov(sb, dir, &group); + group = find_group_orlov(sb, dir); } else - ret2 = find_group_other(sb, dir, &group); + group = find_group_other(sb, dir); err = -ENOSPC; - if (ret2 == -1) + if (group == -1) goto out; for (i = 0; i < sbi->s_groups_count; i++) { @@ -590,7 +583,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) ino > EXT4_INODES_PER_GROUP(sb)) { ext4_error(sb, __FUNCTION__, "reserved inode or inode > inodes count - " - "block_group = %lu, inode=%lu", group, + "block_group = %d, inode=%lu", group, ino + group * EXT4_INODES_PER_GROUP(sb)); err = -EIO; goto fail; @@ -709,6 +702,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) if (!S_ISDIR(mode)) ei->i_flags &= ~EXT4_DIRSYNC_FL; ei->i_file_acl = 0; + ei->i_dir_acl = 0; ei->i_dtime = 0; ei->i_block_alloc_info = NULL; ei->i_block_group = group; @@ -747,10 +741,13 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) if (test_opt(sb, EXTENTS)) { EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; ext4_ext_tree_init(handle, inode); - err = ext4_update_incompat_feature(handle, sb, - EXT4_FEATURE_INCOMPAT_EXTENTS); - if (err) - goto fail; + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { + err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); + if (err) goto fail; + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS); + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "call ext4_journal_dirty_metadata"); + err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + } } ext4_debug("allocating inode %lu\n", inode->i_ino); @@ -780,7 +777,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) { unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); - ext4_group_t block_group; + unsigned long block_group; int bit; struct buffer_head *bitmap_bh = NULL; struct inode *inode = NULL; @@ -836,7 +833,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) { unsigned long desc_count; struct ext4_group_desc *gdp; - ext4_group_t i; + int i; #ifdef EXT4FS_DEBUG struct ext4_super_block *es; unsigned long bitmap_count, x; @@ -857,7 +854,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) continue; x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); - printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", + printk("group %d: stored = %d, counted = %lu\n", i, le16_to_cpu(gdp->bg_free_inodes_count), x); bitmap_count += x; } @@ -882,7 +879,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) unsigned long ext4_count_dirs (struct super_block * sb) { unsigned long count = 0; - ext4_group_t i; + int i; for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); diff --git a/trunk/fs/ext4/inode.c b/trunk/fs/ext4/inode.c index bb717cbb749c..5489703d9573 100644 --- a/trunk/fs/ext4/inode.c +++ b/trunk/fs/ext4/inode.c @@ -105,7 +105,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, */ static unsigned long blocks_for_truncate(struct inode *inode) { - ext4_lblk_t needed; + unsigned long needed; needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); @@ -243,6 +243,13 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) p->bh = bh; } +static int verify_chain(Indirect *from, Indirect *to) +{ + while (from <= to && from->key == *from->p) + from++; + return (from > to); +} + /** * ext4_block_to_path - parse the block number into array of offsets * @inode: inode in question (we are only interested in its superblock) @@ -275,8 +282,7 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) */ static int ext4_block_to_path(struct inode *inode, - ext4_lblk_t i_block, - ext4_lblk_t offsets[4], int *boundary) + long i_block, int offsets[4], int *boundary) { int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); @@ -307,10 +313,7 @@ static int ext4_block_to_path(struct inode *inode, offsets[n++] = i_block & (ptrs - 1); final = ptrs; } else { - ext4_warning(inode->i_sb, "ext4_block_to_path", - "block %lu > max", - i_block + direct_blocks + - indirect_blocks + double_blocks); + ext4_warning(inode->i_sb, "ext4_block_to_path", "block > big"); } if (boundary) *boundary = final - 1 - (i_block & (ptrs - 1)); @@ -341,14 +344,12 @@ static int ext4_block_to_path(struct inode *inode, * (pointer to last triple returned, *@err == 0) * or when it gets an IO error reading an indirect block * (ditto, *@err == -EIO) + * or when it notices that chain had been changed while it was reading + * (ditto, *@err == -EAGAIN) * or when it reads all @depth-1 indirect blocks successfully and finds * the whole chain, all way to the data (returns %NULL, *err == 0). - * - * Need to be called with - * down_read(&EXT4_I(inode)->i_data_sem) */ -static Indirect *ext4_get_branch(struct inode *inode, int depth, - ext4_lblk_t *offsets, +static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets, Indirect chain[4], int *err) { struct super_block *sb = inode->i_sb; @@ -364,6 +365,9 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, bh = sb_bread(sb, le32_to_cpu(p->key)); if (!bh) goto failure; + /* Reader: pointers */ + if (!verify_chain(chain, p)) + goto changed; add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); /* Reader: end */ if (!p->key) @@ -371,6 +375,10 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, } return NULL; +changed: + brelse(bh); + *err = -EAGAIN; + goto no_block; failure: *err = -EIO; no_block: @@ -437,7 +445,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) * stores it in *@goal and returns zero. */ -static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, +static ext4_fsblk_t ext4_find_goal(struct inode *inode, long block, Indirect chain[4], Indirect *partial) { struct ext4_block_alloc_info *block_i; @@ -551,7 +559,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, return ret; failed_out: for (i = 0; i i_sb->s_blocksize; int i, n = 0; @@ -650,9 +658,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, ext4_journal_forget(handle, branch[i].bh); } for (i = 0; i 0, # of blocks mapped or allocated. * return = 0, if plain lookup failed. * return < 0, error case. - * - * - * Need to be called with - * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block - * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) */ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, unsigned long maxblocks, + sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, int create, int extend_disksize) { int err = -EIO; - ext4_lblk_t offsets[4]; + int offsets[4]; Indirect chain[4]; Indirect *partial; ext4_fsblk_t goal; @@ -801,8 +803,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); J_ASSERT(handle != NULL || create == 0); - depth = ext4_block_to_path(inode, iblock, offsets, - &blocks_to_boundary); + depth = ext4_block_to_path(inode,iblock,offsets,&blocks_to_boundary); if (depth == 0) goto out; @@ -818,6 +819,18 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, while (count < maxblocks && count <= blocks_to_boundary) { ext4_fsblk_t blk; + if (!verify_chain(chain, partial)) { + /* + * Indirect block might be removed by + * truncate while we were reading it. + * Handling of that case: forget what we've + * got now. Flag the err as EAGAIN, so it + * will reread. + */ + err = -EAGAIN; + count = 0; + break; + } blk = le32_to_cpu(*(chain[depth-1].p + count)); if (blk == first_block + count) @@ -825,13 +838,44 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, else break; } - goto got_it; + if (err != -EAGAIN) + goto got_it; } /* Next simple case - plain lookup or failed read of indirect block */ if (!create || err == -EIO) goto cleanup; + mutex_lock(&ei->truncate_mutex); + + /* + * If the indirect block is missing while we are reading + * the chain(ext4_get_branch() returns -EAGAIN err), or + * if the chain has been changed after we grab the semaphore, + * (either because another process truncated this branch, or + * another get_block allocated this branch) re-grab the chain to see if + * the request block has been allocated or not. + * + * Since we already block the truncate/other get_block + * at this point, we will have the current copy of the chain when we + * splice the branch into the tree. + */ + if (err == -EAGAIN || !verify_chain(chain, partial)) { + while (partial > chain) { + brelse(partial->bh); + partial--; + } + partial = ext4_get_branch(inode, depth, offsets, chain, &err); + if (!partial) { + count++; + mutex_unlock(&ei->truncate_mutex); + if (err) + goto cleanup; + clear_buffer_new(bh_result); + goto got_it; + } + } + /* * Okay, we need to do block allocation. Lazily initialize the block * allocation info here if necessary @@ -867,12 +911,13 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, err = ext4_splice_branch(handle, inode, iblock, partial, indirect_blks, count); /* - * i_disksize growing is protected by i_data_sem. Don't forget to + * i_disksize growing is protected by truncate_mutex. Don't forget to * protect it if you're about to implement concurrent * ext4_get_block() -bzzz */ if (!err && extend_disksize && inode->i_size > ei->i_disksize) ei->i_disksize = inode->i_size; + mutex_unlock(&ei->truncate_mutex); if (err) goto cleanup; @@ -897,47 +942,6 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, #define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) -int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, - unsigned long max_blocks, struct buffer_head *bh, - int create, int extend_disksize) -{ - int retval; - /* - * Try to see if we can get the block without requesting - * for new file system block. - */ - down_read((&EXT4_I(inode)->i_data_sem)); - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { - retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, - bh, 0, 0); - } else { - retval = ext4_get_blocks_handle(handle, - inode, block, max_blocks, bh, 0, 0); - } - up_read((&EXT4_I(inode)->i_data_sem)); - if (!create || (retval > 0)) - return retval; - - /* - * We need to allocate new blocks which will result - * in i_data update - */ - down_write((&EXT4_I(inode)->i_data_sem)); - /* - * We need to check for EXT4 here because migrate - * could have changed the inode type in between - */ - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { - retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, - bh, create, extend_disksize); - } else { - retval = ext4_get_blocks_handle(handle, inode, block, - max_blocks, bh, create, extend_disksize); - } - up_write((&EXT4_I(inode)->i_data_sem)); - return retval; -} - static int ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -992,7 +996,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock, * `handle' can be NULL if create is zero */ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, - ext4_lblk_t block, int create, int *errp) + long block, int create, int *errp) { struct buffer_head dummy; int fatal = 0, err; @@ -1059,7 +1063,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, } struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, - ext4_lblk_t block, int create, int *err) + int block, int create, int *err) { struct buffer_head * bh; @@ -1442,7 +1446,7 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... * * Same applies to ext4_get_block(). We will deadlock on various things like - * lock_journal and i_data_sem + * lock_journal and i_truncate_mutex. * * Setting PF_MEMALLOC here doesn't work - too many internal memory * allocations fail. @@ -1824,8 +1828,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, { ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); - unsigned blocksize, length, pos; - ext4_lblk_t iblock; + unsigned blocksize, iblock, length, pos; struct inode *inode = mapping->host; struct buffer_head *bh; int err = 0; @@ -1961,7 +1964,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q) * (no partially truncated stuff there). */ static Indirect *ext4_find_shared(struct inode *inode, int depth, - ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top) + int offsets[4], Indirect chain[4], __le32 *top) { Indirect *partial, *p; int k, err; @@ -2045,15 +2048,15 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, for (p = first; p < last; p++) { u32 nr = le32_to_cpu(*p); if (nr) { - struct buffer_head *tbh; + struct buffer_head *bh; *p = 0; - tbh = sb_find_get_block(inode->i_sb, nr); - ext4_forget(handle, 0, inode, tbh, nr); + bh = sb_find_get_block(inode->i_sb, nr); + ext4_forget(handle, 0, inode, bh, nr); } } - ext4_free_blocks(handle, inode, block_to_free, count, 0); + ext4_free_blocks(handle, inode, block_to_free, count); } /** @@ -2226,7 +2229,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, ext4_journal_test_restart(handle, inode); } - ext4_free_blocks(handle, inode, nr, 1, 1); + ext4_free_blocks(handle, inode, nr, 1); if (parent_bh) { /* @@ -2286,12 +2289,12 @@ void ext4_truncate(struct inode *inode) __le32 *i_data = ei->i_data; int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); struct address_space *mapping = inode->i_mapping; - ext4_lblk_t offsets[4]; + int offsets[4]; Indirect chain[4]; Indirect *partial; __le32 nr = 0; int n; - ext4_lblk_t last_block; + long last_block; unsigned blocksize = inode->i_sb->s_blocksize; struct page *page; @@ -2317,10 +2320,8 @@ void ext4_truncate(struct inode *inode) return; } - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { - ext4_ext_truncate(inode, page); - return; - } + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) + return ext4_ext_truncate(inode, page); handle = start_transaction(inode); if (IS_ERR(handle)) { @@ -2368,7 +2369,7 @@ void ext4_truncate(struct inode *inode) * From here we block out all ext4_get_block() callers who want to * modify the block allocation tree. */ - down_write(&ei->i_data_sem); + mutex_lock(&ei->truncate_mutex); if (n == 1) { /* direct blocks */ ext4_free_data(handle, inode, NULL, i_data+offsets[0], @@ -2432,7 +2433,7 @@ void ext4_truncate(struct inode *inode) ext4_discard_reservation(inode); - up_write(&ei->i_data_sem); + mutex_unlock(&ei->truncate_mutex); inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); @@ -2459,8 +2460,7 @@ void ext4_truncate(struct inode *inode) static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, unsigned long ino, struct ext4_iloc *iloc) { - unsigned long desc, group_desc; - ext4_group_t block_group; + unsigned long desc, group_desc, block_group; unsigned long offset; ext4_fsblk_t block; struct buffer_head *bh; @@ -2547,7 +2547,7 @@ static int __ext4_get_inode_loc(struct inode *inode, struct ext4_group_desc *desc; int inodes_per_buffer; int inode_offset, i; - ext4_group_t block_group; + int block_group; int start; block_group = (inode->i_ino - 1) / @@ -2660,28 +2660,6 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei) if (flags & S_DIRSYNC) ei->i_flags |= EXT4_DIRSYNC_FL; } -static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, - struct ext4_inode_info *ei) -{ - blkcnt_t i_blocks ; - struct inode *inode = &(ei->vfs_inode); - struct super_block *sb = inode->i_sb; - - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { - /* we are using combined 48 bit field */ - i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | - le32_to_cpu(raw_inode->i_blocks_lo); - if (ei->i_flags & EXT4_HUGE_FILE_FL) { - /* i_blocks represent file system block size */ - return i_blocks << (inode->i_blkbits - 9); - } else { - return i_blocks; - } - } else { - return le32_to_cpu(raw_inode->i_blocks_lo); - } -} void ext4_read_inode(struct inode * inode) { @@ -2709,6 +2687,7 @@ void ext4_read_inode(struct inode * inode) inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); + inode->i_size = le32_to_cpu(raw_inode->i_size); ei->i_state = 0; ei->i_dir_start_lookup = 0; @@ -2730,15 +2709,19 @@ void ext4_read_inode(struct inode * inode) * recovery code: that's fine, we're about to complete * the process of deleting those. */ } + inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); ei->i_flags = le32_to_cpu(raw_inode->i_flags); - inode->i_blocks = ext4_inode_blocks(raw_inode, ei); - ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); + ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != - cpu_to_le32(EXT4_OS_HURD)) { + cpu_to_le32(EXT4_OS_HURD)) ei->i_file_acl |= ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; + if (!S_ISREG(inode->i_mode)) { + ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); + } else { + inode->i_size |= + ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; } - inode->i_size = ext4_isize(raw_inode); ei->i_disksize = inode->i_size; inode->i_generation = le32_to_cpu(raw_inode->i_generation); ei->i_block_group = iloc.block_group; @@ -2782,13 +2765,6 @@ void ext4_read_inode(struct inode * inode) EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); - inode->i_version = le32_to_cpu(raw_inode->i_disk_version); - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { - if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) - inode->i_version |= - (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; - } - if (S_ISREG(inode->i_mode)) { inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; @@ -2821,55 +2797,6 @@ void ext4_read_inode(struct inode * inode) return; } -static int ext4_inode_blocks_set(handle_t *handle, - struct ext4_inode *raw_inode, - struct ext4_inode_info *ei) -{ - struct inode *inode = &(ei->vfs_inode); - u64 i_blocks = inode->i_blocks; - struct super_block *sb = inode->i_sb; - int err = 0; - - if (i_blocks <= ~0U) { - /* - * i_blocks can be represnted in a 32 bit variable - * as multiple of 512 bytes - */ - raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); - raw_inode->i_blocks_high = 0; - ei->i_flags &= ~EXT4_HUGE_FILE_FL; - } else if (i_blocks <= 0xffffffffffffULL) { - /* - * i_blocks can be represented in a 48 bit variable - * as multiple of 512 bytes - */ - err = ext4_update_rocompat_feature(handle, sb, - EXT4_FEATURE_RO_COMPAT_HUGE_FILE); - if (err) - goto err_out; - /* i_block is stored in the split 48 bit fields */ - raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); - raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); - ei->i_flags &= ~EXT4_HUGE_FILE_FL; - } else { - /* - * i_blocks should be represented in a 48 bit variable - * as multiple of file system block size - */ - err = ext4_update_rocompat_feature(handle, sb, - EXT4_FEATURE_RO_COMPAT_HUGE_FILE); - if (err) - goto err_out; - ei->i_flags |= EXT4_HUGE_FILE_FL; - /* i_block is stored in file system block size */ - i_blocks = i_blocks >> (inode->i_blkbits - 9); - raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); - raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); - } -err_out: - return err; -} - /* * Post the struct inode info into an on-disk inode location in the * buffer-cache. This gobbles the caller's reference to the @@ -2918,42 +2845,47 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_gid_high = 0; } raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); + raw_inode->i_size = cpu_to_le32(ei->i_disksize); EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); - if (ext4_inode_blocks_set(handle, raw_inode, ei)) - goto out_brelse; + raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_flags = cpu_to_le32(ei->i_flags); if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != cpu_to_le32(EXT4_OS_HURD)) raw_inode->i_file_acl_high = cpu_to_le16(ei->i_file_acl >> 32); - raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); - ext4_isize_set(raw_inode, ei->i_disksize); - if (ei->i_disksize > 0x7fffffffULL) { - struct super_block *sb = inode->i_sb; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || - EXT4_SB(sb)->s_es->s_rev_level == - cpu_to_le32(EXT4_GOOD_OLD_REV)) { - /* If this is the first large file - * created, add a flag to the superblock. - */ - err = ext4_journal_get_write_access(handle, - EXT4_SB(sb)->s_sbh); - if (err) - goto out_brelse; - ext4_update_dynamic_rev(sb); - EXT4_SET_RO_COMPAT_FEATURE(sb, + raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); + if (!S_ISREG(inode->i_mode)) { + raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); + } else { + raw_inode->i_size_high = + cpu_to_le32(ei->i_disksize >> 32); + if (ei->i_disksize > 0x7fffffffULL) { + struct super_block *sb = inode->i_sb; + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || + EXT4_SB(sb)->s_es->s_rev_level == + cpu_to_le32(EXT4_GOOD_OLD_REV)) { + /* If this is the first large file + * created, add a flag to the superblock. + */ + err = ext4_journal_get_write_access(handle, + EXT4_SB(sb)->s_sbh); + if (err) + goto out_brelse; + ext4_update_dynamic_rev(sb); + EXT4_SET_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_LARGE_FILE); - sb->s_dirt = 1; - handle->h_sync = 1; - err = ext4_journal_dirty_metadata(handle, - EXT4_SB(sb)->s_sbh); + sb->s_dirt = 1; + handle->h_sync = 1; + err = ext4_journal_dirty_metadata(handle, + EXT4_SB(sb)->s_sbh); + } } } raw_inode->i_generation = cpu_to_le32(inode->i_generation); @@ -2971,14 +2903,8 @@ static int ext4_do_update_inode(handle_t *handle, } else for (block = 0; block < EXT4_N_BLOCKS; block++) raw_inode->i_block[block] = ei->i_data[block]; - raw_inode->i_disk_version = cpu_to_le32(inode->i_version); - if (ei->i_extra_isize) { - if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) - raw_inode->i_version_hi = - cpu_to_le32(inode->i_version >> 32); + if (ei->i_extra_isize) raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); - } - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); rc = ext4_journal_dirty_metadata(handle, bh); @@ -3098,17 +3024,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_journal_stop(handle); } - if (attr->ia_valid & ATTR_SIZE) { - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - - if (attr->ia_size > sbi->s_bitmap_maxbytes) { - error = -EFBIG; - goto err_out; - } - } - } - if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { handle_t *handle; @@ -3205,9 +3120,6 @@ int ext4_mark_iloc_dirty(handle_t *handle, { int err = 0; - if (test_opt(inode->i_sb, I_VERSION)) - inode_inc_iversion(inode); - /* the do_update_inode consumes one bh->b_count */ get_bh(iloc->bh); @@ -3246,10 +3158,8 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, * Expand an inode by new_extra_isize bytes. * Returns 0 on success or negative error number on failure. */ -static int ext4_expand_extra_isize(struct inode *inode, - unsigned int new_extra_isize, - struct ext4_iloc iloc, - handle_t *handle) +int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize, + struct ext4_iloc iloc, handle_t *handle) { struct ext4_inode *raw_inode; struct ext4_xattr_ibody_header *header; diff --git a/trunk/fs/ext4/ioctl.c b/trunk/fs/ext4/ioctl.c index 2ed7c37f897e..e7f894bdb420 100644 --- a/trunk/fs/ext4/ioctl.c +++ b/trunk/fs/ext4/ioctl.c @@ -199,7 +199,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * need to allocate reservation structure for this inode * before set the window size */ - down_write(&ei->i_data_sem); + mutex_lock(&ei->truncate_mutex); if (!ei->i_block_alloc_info) ext4_init_block_alloc_info(inode); @@ -207,7 +207,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; rsv->rsv_goal_size = rsv_window_size; } - up_write(&ei->i_data_sem); + mutex_unlock(&ei->truncate_mutex); return 0; } case EXT4_IOC_GROUP_EXTEND: { @@ -254,9 +254,6 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, return err; } - case EXT4_IOC_MIGRATE: - return ext4_ext_migrate(inode, filp, cmd, arg); - default: return -ENOTTY; } diff --git a/trunk/fs/ext4/mballoc.c b/trunk/fs/ext4/mballoc.c deleted file mode 100644 index 76e5fedc0a0b..000000000000 --- a/trunk/fs/ext4/mballoc.c +++ /dev/null @@ -1,4552 +0,0 @@ -/* - * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com - * Written by Alex Tomas - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public Licens - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- - */ - - -/* - * mballoc.c contains the multiblocks allocation routines - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "group.h" - -/* - * MUSTDO: - * - test ext4_ext_search_left() and ext4_ext_search_right() - * - search for metadata in few groups - * - * TODO v4: - * - normalization should take into account whether file is still open - * - discard preallocations if no free space left (policy?) - * - don't normalize tails - * - quota - * - reservation for superuser - * - * TODO v3: - * - bitmap read-ahead (proposed by Oleg Drokin aka green) - * - track min/max extents in each group for better group selection - * - mb_mark_used() may allocate chunk right after splitting buddy - * - tree of groups sorted by number of free blocks - * - error handling - */ - -/* - * The allocation request involve request for multiple number of blocks - * near to the goal(block) value specified. - * - * During initialization phase of the allocator we decide to use the group - * preallocation or inode preallocation depending on the size file. The - * size of the file could be the resulting file size we would have after - * allocation or the current file size which ever is larger. If the size is - * less that sbi->s_mb_stream_request we select the group - * preallocation. The default value of s_mb_stream_request is 16 - * blocks. This can also be tuned via - * /proc/fs/ext4//stream_req. The value is represented in terms - * of number of blocks. - * - * The main motivation for having small file use group preallocation is to - * ensure that we have small file closer in the disk. - * - * First stage the allocator looks at the inode prealloc list - * ext4_inode_info->i_prealloc_list contain list of prealloc spaces for - * this particular inode. The inode prealloc space is represented as: - * - * pa_lstart -> the logical start block for this prealloc space - * pa_pstart -> the physical start block for this prealloc space - * pa_len -> lenght for this prealloc space - * pa_free -> free space available in this prealloc space - * - * The inode preallocation space is used looking at the _logical_ start - * block. If only the logical file block falls within the range of prealloc - * space we will consume the particular prealloc space. This make sure that - * that the we have contiguous physical blocks representing the file blocks - * - * The important thing to be noted in case of inode prealloc space is that - * we don't modify the values associated to inode prealloc space except - * pa_free. - * - * If we are not able to find blocks in the inode prealloc space and if we - * have the group allocation flag set then we look at the locality group - * prealloc space. These are per CPU prealloc list repreasented as - * - * ext4_sb_info.s_locality_groups[smp_processor_id()] - * - * The reason for having a per cpu locality group is to reduce the contention - * between CPUs. It is possible to get scheduled at this point. - * - * The locality group prealloc space is used looking at whether we have - * enough free space (pa_free) withing the prealloc space. - * - * If we can't allocate blocks via inode prealloc or/and locality group - * prealloc then we look at the buddy cache. The buddy cache is represented - * by ext4_sb_info.s_buddy_cache (struct inode) whose file offset gets - * mapped to the buddy and bitmap information regarding different - * groups. The buddy information is attached to buddy cache inode so that - * we can access them through the page cache. The information regarding - * each group is loaded via ext4_mb_load_buddy. The information involve - * block bitmap and buddy information. The information are stored in the - * inode as: - * - * { page } - * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]... - * - * - * one block each for bitmap and buddy information. So for each group we - * take up 2 blocks. A page can contain blocks_per_page (PAGE_CACHE_SIZE / - * blocksize) blocks. So it can have information regarding groups_per_page - * which is blocks_per_page/2 - * - * The buddy cache inode is not stored on disk. The inode is thrown - * away when the filesystem is unmounted. - * - * We look for count number of blocks in the buddy cache. If we were able - * to locate that many free blocks we return with additional information - * regarding rest of the contiguous physical block available - * - * Before allocating blocks via buddy cache we normalize the request - * blocks. This ensure we ask for more blocks that we needed. The extra - * blocks that we get after allocation is added to the respective prealloc - * list. In case of inode preallocation we follow a list of heuristics - * based on file size. This can be found in ext4_mb_normalize_request. If - * we are doing a group prealloc we try to normalize the request to - * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is set to - * 512 blocks. This can be tuned via - * /proc/fs/ext4/ option the group prealloc request is normalized to the - * stripe value (sbi->s_stripe) - * - * The regular allocator(using the buddy cache) support few tunables. - * - * /proc/fs/ext4//min_to_scan - * /proc/fs/ext4//max_to_scan - * /proc/fs/ext4//order2_req - * - * The regular allocator use buddy scan only if the request len is power of - * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The - * value of s_mb_order2_reqs can be tuned via - * /proc/fs/ext4//order2_req. If the request len is equal to - * stripe size (sbi->s_stripe), we try to search for contigous block in - * stripe size. This should result in better allocation on RAID setup. If - * not we search in the specific group using bitmap for best extents. The - * tunable min_to_scan and max_to_scan controll the behaviour here. - * min_to_scan indicate how long the mballoc __must__ look for a best - * extent and max_to_scanindicate how long the mballoc __can__ look for a - * best extent in the found extents. Searching for the blocks starts with - * the group specified as the goal value in allocation context via - * ac_g_ex. Each group is first checked based on the criteria whether it - * can used for allocation. ext4_mb_good_group explains how the groups are - * checked. - * - * Both the prealloc space are getting populated as above. So for the first - * request we will hit the buddy cache which will result in this prealloc - * space getting filled. The prealloc space is then later used for the - * subsequent request. - */ - -/* - * mballoc operates on the following data: - * - on-disk bitmap - * - in-core buddy (actually includes buddy and bitmap) - * - preallocation descriptors (PAs) - * - * there are two types of preallocations: - * - inode - * assiged to specific inode and can be used for this inode only. - * it describes part of inode's space preallocated to specific - * physical blocks. any block from that preallocated can be used - * independent. the descriptor just tracks number of blocks left - * unused. so, before taking some block from descriptor, one must - * make sure corresponded logical block isn't allocated yet. this - * also means that freeing any block within descriptor's range - * must discard all preallocated blocks. - * - locality group - * assigned to specific locality group which does not translate to - * permanent set of inodes: inode can join and leave group. space - * from this type of preallocation can be used for any inode. thus - * it's consumed from the beginning to the end. - * - * relation between them can be expressed as: - * in-core buddy = on-disk bitmap + preallocation descriptors - * - * this mean blocks mballoc considers used are: - * - allocated blocks (persistent) - * - preallocated blocks (non-persistent) - * - * consistency in mballoc world means that at any time a block is either - * free or used in ALL structures. notice: "any time" should not be read - * literally -- time is discrete and delimited by locks. - * - * to keep it simple, we don't use block numbers, instead we count number of - * blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA. - * - * all operations can be expressed as: - * - init buddy: buddy = on-disk + PAs - * - new PA: buddy += N; PA = N - * - use inode PA: on-disk += N; PA -= N - * - discard inode PA buddy -= on-disk - PA; PA = 0 - * - use locality group PA on-disk += N; PA -= N - * - discard locality group PA buddy -= PA; PA = 0 - * note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap - * is used in real operation because we can't know actual used - * bits from PA, only from on-disk bitmap - * - * if we follow this strict logic, then all operations above should be atomic. - * given some of them can block, we'd have to use something like semaphores - * killing performance on high-end SMP hardware. let's try to relax it using - * the following knowledge: - * 1) if buddy is referenced, it's already initialized - * 2) while block is used in buddy and the buddy is referenced, - * nobody can re-allocate that block - * 3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has - * bit set and PA claims same block, it's OK. IOW, one can set bit in - * on-disk bitmap if buddy has same bit set or/and PA covers corresponded - * block - * - * so, now we're building a concurrency table: - * - init buddy vs. - * - new PA - * blocks for PA are allocated in the buddy, buddy must be referenced - * until PA is linked to allocation group to avoid concurrent buddy init - * - use inode PA - * we need to make sure that either on-disk bitmap or PA has uptodate data - * given (3) we care that PA-=N operation doesn't interfere with init - * - discard inode PA - * the simplest way would be to have buddy initialized by the discard - * - use locality group PA - * again PA-=N must be serialized with init - * - discard locality group PA - * the simplest way would be to have buddy initialized by the discard - * - new PA vs. - * - use inode PA - * i_data_sem serializes them - * - discard inode PA - * discard process must wait until PA isn't used by another process - * - use locality group PA - * some mutex should serialize them - * - discard locality group PA - * discard process must wait until PA isn't used by another process - * - use inode PA - * - use inode PA - * i_data_sem or another mutex should serializes them - * - discard inode PA - * discard process must wait until PA isn't used by another process - * - use locality group PA - * nothing wrong here -- they're different PAs covering different blocks - * - discard locality group PA - * discard process must wait until PA isn't used by another process - * - * now we're ready to make few consequences: - * - PA is referenced and while it is no discard is possible - * - PA is referenced until block isn't marked in on-disk bitmap - * - PA changes only after on-disk bitmap - * - discard must not compete with init. either init is done before - * any discard or they're serialized somehow - * - buddy init as sum of on-disk bitmap and PAs is done atomically - * - * a special case when we've used PA to emptiness. no need to modify buddy - * in this case, but we should care about concurrent init - * - */ - - /* - * Logic in few words: - * - * - allocation: - * load group - * find blocks - * mark bits in on-disk bitmap - * release group - * - * - use preallocation: - * find proper PA (per-inode or group) - * load group - * mark bits in on-disk bitmap - * release group - * release PA - * - * - free: - * load group - * mark bits in on-disk bitmap - * release group - * - * - discard preallocations in group: - * mark PAs deleted - * move them onto local list - * load on-disk bitmap - * load group - * remove PA from object (inode or locality group) - * mark free blocks in-core - * - * - discard inode's preallocations: - */ - -/* - * Locking rules - * - * Locks: - * - bitlock on a group (group) - * - object (inode/locality) (object) - * - per-pa lock (pa) - * - * Paths: - * - new pa - * object - * group - * - * - find and use pa: - * pa - * - * - release consumed pa: - * pa - * group - * object - * - * - generate in-core bitmap: - * group - * pa - * - * - discard all for given object (inode, locality group): - * object - * pa - * group - * - * - discard all for given group: - * group - * pa - * group - * object - * - */ - -/* - * with AGGRESSIVE_CHECK allocator runs consistency checks over - * structures. these checks slow things down a lot - */ -#define AGGRESSIVE_CHECK__ - -/* - * with DOUBLE_CHECK defined mballoc creates persistent in-core - * bitmaps, maintains and uses them to check for double allocations - */ -#define DOUBLE_CHECK__ - -/* - */ -#define MB_DEBUG__ -#ifdef MB_DEBUG -#define mb_debug(fmt, a...) printk(fmt, ##a) -#else -#define mb_debug(fmt, a...) -#endif - -/* - * with EXT4_MB_HISTORY mballoc stores last N allocations in memory - * and you can monitor it in /proc/fs/ext4//mb_history - */ -#define EXT4_MB_HISTORY -#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ -#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ -#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */ -#define EXT4_MB_HISTORY_FREE 8 /* free */ - -#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \ - EXT4_MB_HISTORY_PREALLOC) - -/* - * How long mballoc can look for a best extent (in found extents) - */ -#define MB_DEFAULT_MAX_TO_SCAN 200 - -/* - * How long mballoc must look for a best extent - */ -#define MB_DEFAULT_MIN_TO_SCAN 10 - -/* - * How many groups mballoc will scan looking for the best chunk - */ -#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 - -/* - * with 'ext4_mb_stats' allocator will collect stats that will be - * shown at umount. The collecting costs though! - */ -#define MB_DEFAULT_STATS 1 - -/* - * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served - * by the stream allocator, which purpose is to pack requests - * as close each to other as possible to produce smooth I/O traffic - * We use locality group prealloc space for stream request. - * We can tune the same via /proc/fs/ext4//stream_req - */ -#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ - -/* - * for which requests use 2^N search using buddies - */ -#define MB_DEFAULT_ORDER2_REQS 2 - -/* - * default group prealloc size 512 blocks - */ -#define MB_DEFAULT_GROUP_PREALLOC 512 - -static struct kmem_cache *ext4_pspace_cachep; - -#ifdef EXT4_BB_MAX_BLOCKS -#undef EXT4_BB_MAX_BLOCKS -#endif -#define EXT4_BB_MAX_BLOCKS 30 - -struct ext4_free_metadata { - ext4_group_t group; - unsigned short num; - ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; - struct list_head list; -}; - -struct ext4_group_info { - unsigned long bb_state; - unsigned long bb_tid; - struct ext4_free_metadata *bb_md_cur; - unsigned short bb_first_free; - unsigned short bb_free; - unsigned short bb_fragments; - struct list_head bb_prealloc_list; -#ifdef DOUBLE_CHECK - void *bb_bitmap; -#endif - unsigned short bb_counters[]; -}; - -#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 -#define EXT4_GROUP_INFO_LOCKED_BIT 1 - -#define EXT4_MB_GRP_NEED_INIT(grp) \ - (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) - - -struct ext4_prealloc_space { - struct list_head pa_inode_list; - struct list_head pa_group_list; - union { - struct list_head pa_tmp_list; - struct rcu_head pa_rcu; - } u; - spinlock_t pa_lock; - atomic_t pa_count; - unsigned pa_deleted; - ext4_fsblk_t pa_pstart; /* phys. block */ - ext4_lblk_t pa_lstart; /* log. block */ - unsigned short pa_len; /* len of preallocated chunk */ - unsigned short pa_free; /* how many blocks are free */ - unsigned short pa_linear; /* consumed in one direction - * strictly, for grp prealloc */ - spinlock_t *pa_obj_lock; - struct inode *pa_inode; /* hack, for history only */ -}; - - -struct ext4_free_extent { - ext4_lblk_t fe_logical; - ext4_grpblk_t fe_start; - ext4_group_t fe_group; - int fe_len; -}; - -/* - * Locality group: - * we try to group all related changes together - * so that writeback can flush/allocate them together as well - */ -struct ext4_locality_group { - /* for allocator */ - struct mutex lg_mutex; /* to serialize allocates */ - struct list_head lg_prealloc_list;/* list of preallocations */ - spinlock_t lg_prealloc_lock; -}; - -struct ext4_allocation_context { - struct inode *ac_inode; - struct super_block *ac_sb; - - /* original request */ - struct ext4_free_extent ac_o_ex; - - /* goal request (after normalization) */ - struct ext4_free_extent ac_g_ex; - - /* the best found extent */ - struct ext4_free_extent ac_b_ex; - - /* copy of the bext found extent taken before preallocation efforts */ - struct ext4_free_extent ac_f_ex; - - /* number of iterations done. we have to track to limit searching */ - unsigned long ac_ex_scanned; - __u16 ac_groups_scanned; - __u16 ac_found; - __u16 ac_tail; - __u16 ac_buddy; - __u16 ac_flags; /* allocation hints */ - __u8 ac_status; - __u8 ac_criteria; - __u8 ac_repeats; - __u8 ac_2order; /* if request is to allocate 2^N blocks and - * N > 0, the field stores N, otherwise 0 */ - __u8 ac_op; /* operation, for history only */ - struct page *ac_bitmap_page; - struct page *ac_buddy_page; - struct ext4_prealloc_space *ac_pa; - struct ext4_locality_group *ac_lg; -}; - -#define AC_STATUS_CONTINUE 1 -#define AC_STATUS_FOUND 2 -#define AC_STATUS_BREAK 3 - -struct ext4_mb_history { - struct ext4_free_extent orig; /* orig allocation */ - struct ext4_free_extent goal; /* goal allocation */ - struct ext4_free_extent result; /* result allocation */ - unsigned pid; - unsigned ino; - __u16 found; /* how many extents have been found */ - __u16 groups; /* how many groups have been scanned */ - __u16 tail; /* what tail broke some buddy */ - __u16 buddy; /* buddy the tail ^^^ broke */ - __u16 flags; - __u8 cr:3; /* which phase the result extent was found at */ - __u8 op:4; - __u8 merged:1; -}; - -struct ext4_buddy { - struct page *bd_buddy_page; - void *bd_buddy; - struct page *bd_bitmap_page; - void *bd_bitmap; - struct ext4_group_info *bd_info; - struct super_block *bd_sb; - __u16 bd_blkbits; - ext4_group_t bd_group; -}; -#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) -#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) - -#ifndef EXT4_MB_HISTORY -static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) -{ - return; -} -#else -static void ext4_mb_store_history(struct ext4_allocation_context *ac); -#endif - -#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) - -static struct proc_dir_entry *proc_root_ext4; -struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); -ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp); - -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, - ext4_group_t group); -static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); -static void ext4_mb_free_committed_blocks(struct super_block *); -static void ext4_mb_return_to_preallocation(struct inode *inode, - struct ext4_buddy *e4b, sector_t block, - int count); -static void ext4_mb_put_pa(struct ext4_allocation_context *, - struct super_block *, struct ext4_prealloc_space *pa); -static int ext4_mb_init_per_dev_proc(struct super_block *sb); -static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); - - -static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) -{ - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); -} - -static inline void ext4_unlock_group(struct super_block *sb, - ext4_group_t group) -{ - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); -} - -static inline int ext4_is_group_locked(struct super_block *sb, - ext4_group_t group) -{ - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, - &(grinfo->bb_state)); -} - -static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, - struct ext4_free_extent *fex) -{ - ext4_fsblk_t block; - - block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb) - + fex->fe_start - + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - return block; -} - -#if BITS_PER_LONG == 64 -#define mb_correct_addr_and_bit(bit, addr) \ -{ \ - bit += ((unsigned long) addr & 7UL) << 3; \ - addr = (void *) ((unsigned long) addr & ~7UL); \ -} -#elif BITS_PER_LONG == 32 -#define mb_correct_addr_and_bit(bit, addr) \ -{ \ - bit += ((unsigned long) addr & 3UL) << 3; \ - addr = (void *) ((unsigned long) addr & ~3UL); \ -} -#else -#error "how many bits you are?!" -#endif - -static inline int mb_test_bit(int bit, void *addr) -{ - /* - * ext4_test_bit on architecture like powerpc - * needs unsigned long aligned address - */ - mb_correct_addr_and_bit(bit, addr); - return ext4_test_bit(bit, addr); -} - -static inline void mb_set_bit(int bit, void *addr) -{ - mb_correct_addr_and_bit(bit, addr); - ext4_set_bit(bit, addr); -} - -static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr) -{ - mb_correct_addr_and_bit(bit, addr); - ext4_set_bit_atomic(lock, bit, addr); -} - -static inline void mb_clear_bit(int bit, void *addr) -{ - mb_correct_addr_and_bit(bit, addr); - ext4_clear_bit(bit, addr); -} - -static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr) -{ - mb_correct_addr_and_bit(bit, addr); - ext4_clear_bit_atomic(lock, bit, addr); -} - -static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) -{ - char *bb; - - /* FIXME!! is this needed */ - BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); - BUG_ON(max == NULL); - - if (order > e4b->bd_blkbits + 1) { - *max = 0; - return NULL; - } - - /* at order 0 we see each particular block */ - *max = 1 << (e4b->bd_blkbits + 3); - if (order == 0) - return EXT4_MB_BITMAP(e4b); - - bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; - *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; - - return bb; -} - -#ifdef DOUBLE_CHECK -static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, - int first, int count) -{ - int i; - struct super_block *sb = e4b->bd_sb; - - if (unlikely(e4b->bd_info->bb_bitmap == NULL)) - return; - BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); - for (i = 0; i < count; i++) { - if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { - ext4_fsblk_t blocknr; - blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); - blocknr += first + i; - blocknr += - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - - ext4_error(sb, __FUNCTION__, "double-free of inode" - " %lu's block %llu(bit %u in group %lu)\n", - inode ? inode->i_ino : 0, blocknr, - first + i, e4b->bd_group); - } - mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); - } -} - -static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count) -{ - int i; - - if (unlikely(e4b->bd_info->bb_bitmap == NULL)) - return; - BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); - for (i = 0; i < count; i++) { - BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap)); - mb_set_bit(first + i, e4b->bd_info->bb_bitmap); - } -} - -static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) -{ - if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) { - unsigned char *b1, *b2; - int i; - b1 = (unsigned char *) e4b->bd_info->bb_bitmap; - b2 = (unsigned char *) bitmap; - for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { - if (b1[i] != b2[i]) { - printk("corruption in group %lu at byte %u(%u):" - " %x in copy != %x on disk/prealloc\n", - e4b->bd_group, i, i * 8, b1[i], b2[i]); - BUG(); - } - } - } -} - -#else -static inline void mb_free_blocks_double(struct inode *inode, - struct ext4_buddy *e4b, int first, int count) -{ - return; -} -static inline void mb_mark_used_double(struct ext4_buddy *e4b, - int first, int count) -{ - return; -} -static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) -{ - return; -} -#endif - -#ifdef AGGRESSIVE_CHECK - -#define MB_CHECK_ASSERT(assert) \ -do { \ - if (!(assert)) { \ - printk(KERN_EMERG \ - "Assertion failure in %s() at %s:%d: \"%s\"\n", \ - function, file, line, # assert); \ - BUG(); \ - } \ -} while (0) - -static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, - const char *function, int line) -{ - struct super_block *sb = e4b->bd_sb; - int order = e4b->bd_blkbits + 1; - int max; - int max2; - int i; - int j; - int k; - int count; - struct ext4_group_info *grp; - int fragments = 0; - int fstart; - struct list_head *cur; - void *buddy; - void *buddy2; - - if (!test_opt(sb, MBALLOC)) - return 0; - - { - static int mb_check_counter; - if (mb_check_counter++ % 100 != 0) - return 0; - } - - while (order > 1) { - buddy = mb_find_buddy(e4b, order, &max); - MB_CHECK_ASSERT(buddy); - buddy2 = mb_find_buddy(e4b, order - 1, &max2); - MB_CHECK_ASSERT(buddy2); - MB_CHECK_ASSERT(buddy != buddy2); - MB_CHECK_ASSERT(max * 2 == max2); - - count = 0; - for (i = 0; i < max; i++) { - - if (mb_test_bit(i, buddy)) { - /* only single bit in buddy2 may be 1 */ - if (!mb_test_bit(i << 1, buddy2)) { - MB_CHECK_ASSERT( - mb_test_bit((i<<1)+1, buddy2)); - } else if (!mb_test_bit((i << 1) + 1, buddy2)) { - MB_CHECK_ASSERT( - mb_test_bit(i << 1, buddy2)); - } - continue; - } - - /* both bits in buddy2 must be 0 */ - MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); - MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); - - for (j = 0; j < (1 << order); j++) { - k = (i * (1 << order)) + j; - MB_CHECK_ASSERT( - !mb_test_bit(k, EXT4_MB_BITMAP(e4b))); - } - count++; - } - MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count); - order--; - } - - fstart = -1; - buddy = mb_find_buddy(e4b, 0, &max); - for (i = 0; i < max; i++) { - if (!mb_test_bit(i, buddy)) { - MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free); - if (fstart == -1) { - fragments++; - fstart = i; - } - continue; - } - fstart = -1; - /* check used bits only */ - for (j = 0; j < e4b->bd_blkbits + 1; j++) { - buddy2 = mb_find_buddy(e4b, j, &max2); - k = i >> j; - MB_CHECK_ASSERT(k < max2); - MB_CHECK_ASSERT(mb_test_bit(k, buddy2)); - } - } - MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info)); - MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); - - grp = ext4_get_group_info(sb, e4b->bd_group); - buddy = mb_find_buddy(e4b, 0, &max); - list_for_each(cur, &grp->bb_prealloc_list) { - ext4_group_t groupnr; - struct ext4_prealloc_space *pa; - pa = list_entry(cur, struct ext4_prealloc_space, group_list); - ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k); - MB_CHECK_ASSERT(groupnr == e4b->bd_group); - for (i = 0; i < pa->len; i++) - MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); - } - return 0; -} -#undef MB_CHECK_ASSERT -#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ - __FILE__, __FUNCTION__, __LINE__) -#else -#define mb_check_buddy(e4b) -#endif - -/* FIXME!! need more doc */ -static void ext4_mb_mark_free_simple(struct super_block *sb, - void *buddy, unsigned first, int len, - struct ext4_group_info *grp) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - unsigned short min; - unsigned short max; - unsigned short chunk; - unsigned short border; - - BUG_ON(len >= EXT4_BLOCKS_PER_GROUP(sb)); - - border = 2 << sb->s_blocksize_bits; - - while (len > 0) { - /* find how many blocks can be covered since this position */ - max = ffs(first | border) - 1; - - /* find how many blocks of power 2 we need to mark */ - min = fls(len) - 1; - - if (max < min) - min = max; - chunk = 1 << min; - - /* mark multiblock chunks only */ - grp->bb_counters[min]++; - if (min > 0) - mb_clear_bit(first >> min, - buddy + sbi->s_mb_offsets[min]); - - len -= chunk; - first += chunk; - } -} - -static void ext4_mb_generate_buddy(struct super_block *sb, - void *buddy, void *bitmap, ext4_group_t group) -{ - struct ext4_group_info *grp = ext4_get_group_info(sb, group); - unsigned short max = EXT4_BLOCKS_PER_GROUP(sb); - unsigned short i = 0; - unsigned short first; - unsigned short len; - unsigned free = 0; - unsigned fragments = 0; - unsigned long long period = get_cycles(); - - /* initialize buddy from bitmap which is aggregation - * of on-disk bitmap and preallocations */ - i = ext4_find_next_zero_bit(bitmap, max, 0); - grp->bb_first_free = i; - while (i < max) { - fragments++; - first = i; - i = ext4_find_next_bit(bitmap, max, i); - len = i - first; - free += len; - if (len > 1) - ext4_mb_mark_free_simple(sb, buddy, first, len, grp); - else - grp->bb_counters[0]++; - if (i < max) - i = ext4_find_next_zero_bit(bitmap, max, i); - } - grp->bb_fragments = fragments; - - if (free != grp->bb_free) { - printk(KERN_DEBUG - "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", - group, free, grp->bb_free); - grp->bb_free = free; - } - - clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); - - period = get_cycles() - period; - spin_lock(&EXT4_SB(sb)->s_bal_lock); - EXT4_SB(sb)->s_mb_buddies_generated++; - EXT4_SB(sb)->s_mb_generation_time += period; - spin_unlock(&EXT4_SB(sb)->s_bal_lock); -} - -/* The buddy information is attached the buddy cache inode - * for convenience. The information regarding each group - * is loaded via ext4_mb_load_buddy. The information involve - * block bitmap and buddy information. The information are - * stored in the inode as - * - * { page } - * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]... - * - * - * one block each for bitmap and buddy information. - * So for each group we take up 2 blocks. A page can - * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks. - * So it can have information regarding groups_per_page which - * is blocks_per_page/2 - */ - -static int ext4_mb_init_cache(struct page *page, char *incore) -{ - int blocksize; - int blocks_per_page; - int groups_per_page; - int err = 0; - int i; - ext4_group_t first_group; - int first_block; - struct super_block *sb; - struct buffer_head *bhs; - struct buffer_head **bh; - struct inode *inode; - char *data; - char *bitmap; - - mb_debug("init page %lu\n", page->index); - - inode = page->mapping->host; - sb = inode->i_sb; - blocksize = 1 << inode->i_blkbits; - blocks_per_page = PAGE_CACHE_SIZE / blocksize; - - groups_per_page = blocks_per_page >> 1; - if (groups_per_page == 0) - groups_per_page = 1; - - /* allocate buffer_heads to read bitmaps */ - if (groups_per_page > 1) { - err = -ENOMEM; - i = sizeof(struct buffer_head *) * groups_per_page; - bh = kzalloc(i, GFP_NOFS); - if (bh == NULL) - goto out; - } else - bh = &bhs; - - first_group = page->index * blocks_per_page / 2; - - /* read all groups the page covers into the cache */ - for (i = 0; i < groups_per_page; i++) { - struct ext4_group_desc *desc; - - if (first_group + i >= EXT4_SB(sb)->s_groups_count) - break; - - err = -EIO; - desc = ext4_get_group_desc(sb, first_group + i, NULL); - if (desc == NULL) - goto out; - - err = -ENOMEM; - bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc)); - if (bh[i] == NULL) - goto out; - - if (bh_uptodate_or_lock(bh[i])) - continue; - - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - ext4_init_block_bitmap(sb, bh[i], - first_group + i, desc); - set_buffer_uptodate(bh[i]); - unlock_buffer(bh[i]); - continue; - } - get_bh(bh[i]); - bh[i]->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh[i]); - mb_debug("read bitmap for group %lu\n", first_group + i); - } - - /* wait for I/O completion */ - for (i = 0; i < groups_per_page && bh[i]; i++) - wait_on_buffer(bh[i]); - - err = -EIO; - for (i = 0; i < groups_per_page && bh[i]; i++) - if (!buffer_uptodate(bh[i])) - goto out; - - first_block = page->index * blocks_per_page; - for (i = 0; i < blocks_per_page; i++) { - int group; - struct ext4_group_info *grinfo; - - group = (first_block + i) >> 1; - if (group >= EXT4_SB(sb)->s_groups_count) - break; - - /* - * data carry information regarding this - * particular group in the format specified - * above - * - */ - data = page_address(page) + (i * blocksize); - bitmap = bh[group - first_group]->b_data; - - /* - * We place the buddy block and bitmap block - * close together - */ - if ((first_block + i) & 1) { - /* this is block of buddy */ - BUG_ON(incore == NULL); - mb_debug("put buddy for group %u in page %lu/%x\n", - group, page->index, i * blocksize); - memset(data, 0xff, blocksize); - grinfo = ext4_get_group_info(sb, group); - grinfo->bb_fragments = 0; - memset(grinfo->bb_counters, 0, - sizeof(unsigned short)*(sb->s_blocksize_bits+2)); - /* - * incore got set to the group block bitmap below - */ - ext4_mb_generate_buddy(sb, data, incore, group); - incore = NULL; - } else { - /* this is block of bitmap */ - BUG_ON(incore != NULL); - mb_debug("put bitmap for group %u in page %lu/%x\n", - group, page->index, i * blocksize); - - /* see comments in ext4_mb_put_pa() */ - ext4_lock_group(sb, group); - memcpy(data, bitmap, blocksize); - - /* mark all preallocated blks used in in-core bitmap */ - ext4_mb_generate_from_pa(sb, data, group); - ext4_unlock_group(sb, group); - - /* set incore so that the buddy information can be - * generated using this - */ - incore = data; - } - } - SetPageUptodate(page); - -out: - if (bh) { - for (i = 0; i < groups_per_page && bh[i]; i++) - brelse(bh[i]); - if (bh != &bhs) - kfree(bh); - } - return err; -} - -static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, - struct ext4_buddy *e4b) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct inode *inode = sbi->s_buddy_cache; - int blocks_per_page; - int block; - int pnum; - int poff; - struct page *page; - - mb_debug("load group %lu\n", group); - - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; - - e4b->bd_blkbits = sb->s_blocksize_bits; - e4b->bd_info = ext4_get_group_info(sb, group); - e4b->bd_sb = sb; - e4b->bd_group = group; - e4b->bd_buddy_page = NULL; - e4b->bd_bitmap_page = NULL; - - /* - * the buddy cache inode stores the block bitmap - * and buddy information in consecutive blocks. - * So for each group we need two blocks. - */ - block = group * 2; - pnum = block / blocks_per_page; - poff = block % blocks_per_page; - - /* we could use find_or_create_page(), but it locks page - * what we'd like to avoid in fast path ... */ - page = find_get_page(inode->i_mapping, pnum); - if (page == NULL || !PageUptodate(page)) { - if (page) - page_cache_release(page); - page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); - if (page) { - BUG_ON(page->mapping != inode->i_mapping); - if (!PageUptodate(page)) { - ext4_mb_init_cache(page, NULL); - mb_cmp_bitmaps(e4b, page_address(page) + - (poff * sb->s_blocksize)); - } - unlock_page(page); - } - } - if (page == NULL || !PageUptodate(page)) - goto err; - e4b->bd_bitmap_page = page; - e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); - mark_page_accessed(page); - - block++; - pnum = block / blocks_per_page; - poff = block % blocks_per_page; - - page = find_get_page(inode->i_mapping, pnum); - if (page == NULL || !PageUptodate(page)) { - if (page) - page_cache_release(page); - page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); - if (page) { - BUG_ON(page->mapping != inode->i_mapping); - if (!PageUptodate(page)) - ext4_mb_init_cache(page, e4b->bd_bitmap); - - unlock_page(page); - } - } - if (page == NULL || !PageUptodate(page)) - goto err; - e4b->bd_buddy_page = page; - e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); - mark_page_accessed(page); - - BUG_ON(e4b->bd_bitmap_page == NULL); - BUG_ON(e4b->bd_buddy_page == NULL); - - return 0; - -err: - if (e4b->bd_bitmap_page) - page_cache_release(e4b->bd_bitmap_page); - if (e4b->bd_buddy_page) - page_cache_release(e4b->bd_buddy_page); - e4b->bd_buddy = NULL; - e4b->bd_bitmap = NULL; - return -EIO; -} - -static void ext4_mb_release_desc(struct ext4_buddy *e4b) -{ - if (e4b->bd_bitmap_page) - page_cache_release(e4b->bd_bitmap_page); - if (e4b->bd_buddy_page) - page_cache_release(e4b->bd_buddy_page); -} - - -static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) -{ - int order = 1; - void *bb; - - BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); - BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); - - bb = EXT4_MB_BUDDY(e4b); - while (order <= e4b->bd_blkbits + 1) { - block = block >> 1; - if (!mb_test_bit(block, bb)) { - /* this block is part of buddy of order 'order' */ - return order; - } - bb += 1 << (e4b->bd_blkbits - order); - order++; - } - return 0; -} - -static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) -{ - __u32 *addr; - - len = cur + len; - while (cur < len) { - if ((cur & 31) == 0 && (len - cur) >= 32) { - /* fast path: clear whole word at once */ - addr = bm + (cur >> 3); - *addr = 0; - cur += 32; - continue; - } - mb_clear_bit_atomic(lock, cur, bm); - cur++; - } -} - -static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) -{ - __u32 *addr; - - len = cur + len; - while (cur < len) { - if ((cur & 31) == 0 && (len - cur) >= 32) { - /* fast path: set whole word at once */ - addr = bm + (cur >> 3); - *addr = 0xffffffff; - cur += 32; - continue; - } - mb_set_bit_atomic(lock, cur, bm); - cur++; - } -} - -static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, - int first, int count) -{ - int block = 0; - int max = 0; - int order; - void *buddy; - void *buddy2; - struct super_block *sb = e4b->bd_sb; - - BUG_ON(first + count > (sb->s_blocksize << 3)); - BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); - mb_check_buddy(e4b); - mb_free_blocks_double(inode, e4b, first, count); - - e4b->bd_info->bb_free += count; - if (first < e4b->bd_info->bb_first_free) - e4b->bd_info->bb_first_free = first; - - /* let's maintain fragments counter */ - if (first != 0) - block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b)); - if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) - max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b)); - if (block && max) - e4b->bd_info->bb_fragments--; - else if (!block && !max) - e4b->bd_info->bb_fragments++; - - /* let's maintain buddy itself */ - while (count-- > 0) { - block = first++; - order = 0; - - if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { - ext4_fsblk_t blocknr; - blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); - blocknr += block; - blocknr += - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - - ext4_error(sb, __FUNCTION__, "double-free of inode" - " %lu's block %llu(bit %u in group %lu)\n", - inode ? inode->i_ino : 0, blocknr, block, - e4b->bd_group); - } - mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); - e4b->bd_info->bb_counters[order]++; - - /* start of the buddy */ - buddy = mb_find_buddy(e4b, order, &max); - - do { - block &= ~1UL; - if (mb_test_bit(block, buddy) || - mb_test_bit(block + 1, buddy)) - break; - - /* both the buddies are free, try to coalesce them */ - buddy2 = mb_find_buddy(e4b, order + 1, &max); - - if (!buddy2) - break; - - if (order > 0) { - /* for special purposes, we don't set - * free bits in bitmap */ - mb_set_bit(block, buddy); - mb_set_bit(block + 1, buddy); - } - e4b->bd_info->bb_counters[order]--; - e4b->bd_info->bb_counters[order]--; - - block = block >> 1; - order++; - e4b->bd_info->bb_counters[order]++; - - mb_clear_bit(block, buddy2); - buddy = buddy2; - } while (1); - } - mb_check_buddy(e4b); - - return 0; -} - -static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, - int needed, struct ext4_free_extent *ex) -{ - int next = block; - int max; - int ord; - void *buddy; - - BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); - BUG_ON(ex == NULL); - - buddy = mb_find_buddy(e4b, order, &max); - BUG_ON(buddy == NULL); - BUG_ON(block >= max); - if (mb_test_bit(block, buddy)) { - ex->fe_len = 0; - ex->fe_start = 0; - ex->fe_group = 0; - return 0; - } - - /* FIXME dorp order completely ? */ - if (likely(order == 0)) { - /* find actual order */ - order = mb_find_order_for_block(e4b, block); - block = block >> order; - } - - ex->fe_len = 1 << order; - ex->fe_start = block << order; - ex->fe_group = e4b->bd_group; - - /* calc difference from given start */ - next = next - ex->fe_start; - ex->fe_len -= next; - ex->fe_start += next; - - while (needed > ex->fe_len && - (buddy = mb_find_buddy(e4b, order, &max))) { - - if (block + 1 >= max) - break; - - next = (block + 1) * (1 << order); - if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) - break; - - ord = mb_find_order_for_block(e4b, next); - - order = ord; - block = next >> order; - ex->fe_len += 1 << order; - } - - BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))); - return ex->fe_len; -} - -static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) -{ - int ord; - int mlen = 0; - int max = 0; - int cur; - int start = ex->fe_start; - int len = ex->fe_len; - unsigned ret = 0; - int len0 = len; - void *buddy; - - BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3)); - BUG_ON(e4b->bd_group != ex->fe_group); - BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); - mb_check_buddy(e4b); - mb_mark_used_double(e4b, start, len); - - e4b->bd_info->bb_free -= len; - if (e4b->bd_info->bb_first_free == start) - e4b->bd_info->bb_first_free += len; - - /* let's maintain fragments counter */ - if (start != 0) - mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b)); - if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0]) - max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b)); - if (mlen && max) - e4b->bd_info->bb_fragments++; - else if (!mlen && !max) - e4b->bd_info->bb_fragments--; - - /* let's maintain buddy itself */ - while (len) { - ord = mb_find_order_for_block(e4b, start); - - if (((start >> ord) << ord) == start && len >= (1 << ord)) { - /* the whole chunk may be allocated at once! */ - mlen = 1 << ord; - buddy = mb_find_buddy(e4b, ord, &max); - BUG_ON((start >> ord) >= max); - mb_set_bit(start >> ord, buddy); - e4b->bd_info->bb_counters[ord]--; - start += mlen; - len -= mlen; - BUG_ON(len < 0); - continue; - } - - /* store for history */ - if (ret == 0) - ret = len | (ord << 16); - - /* we have to split large buddy */ - BUG_ON(ord <= 0); - buddy = mb_find_buddy(e4b, ord, &max); - mb_set_bit(start >> ord, buddy); - e4b->bd_info->bb_counters[ord]--; - - ord--; - cur = (start >> ord) & ~1U; - buddy = mb_find_buddy(e4b, ord, &max); - mb_clear_bit(cur, buddy); - mb_clear_bit(cur + 1, buddy); - e4b->bd_info->bb_counters[ord]++; - e4b->bd_info->bb_counters[ord]++; - } - - mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group), - EXT4_MB_BITMAP(e4b), ex->fe_start, len0); - mb_check_buddy(e4b); - - return ret; -} - -/* - * Must be called under group lock! - */ -static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, - struct ext4_buddy *e4b) -{ - struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); - int ret; - - BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group); - BUG_ON(ac->ac_status == AC_STATUS_FOUND); - - ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len); - ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical; - ret = mb_mark_used(e4b, &ac->ac_b_ex); - - /* preallocation can change ac_b_ex, thus we store actually - * allocated blocks for history */ - ac->ac_f_ex = ac->ac_b_ex; - - ac->ac_status = AC_STATUS_FOUND; - ac->ac_tail = ret & 0xffff; - ac->ac_buddy = ret >> 16; - - /* XXXXXXX: SUCH A HORRIBLE **CK */ - /*FIXME!! Why ? */ - ac->ac_bitmap_page = e4b->bd_bitmap_page; - get_page(ac->ac_bitmap_page); - ac->ac_buddy_page = e4b->bd_buddy_page; - get_page(ac->ac_buddy_page); - - /* store last allocated for subsequent stream allocation */ - if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { - spin_lock(&sbi->s_md_lock); - sbi->s_mb_last_group = ac->ac_f_ex.fe_group; - sbi->s_mb_last_start = ac->ac_f_ex.fe_start; - spin_unlock(&sbi->s_md_lock); - } -} - -/* - * regular allocator, for general purposes allocation - */ - -static void ext4_mb_check_limits(struct ext4_allocation_context *ac, - struct ext4_buddy *e4b, - int finish_group) -{ - struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); - struct ext4_free_extent *bex = &ac->ac_b_ex; - struct ext4_free_extent *gex = &ac->ac_g_ex; - struct ext4_free_extent ex; - int max; - - /* - * We don't want to scan for a whole year - */ - if (ac->ac_found > sbi->s_mb_max_to_scan && - !(ac->ac_flags & EXT4_MB_HINT_FIRST)) { - ac->ac_status = AC_STATUS_BREAK; - return; - } - - /* - * Haven't found good chunk so far, let's continue - */ - if (bex->fe_len < gex->fe_len) - return; - - if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan) - && bex->fe_group == e4b->bd_group) { - /* recheck chunk's availability - we don't know - * when it was found (within this lock-unlock - * period or not) */ - max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex); - if (max >= gex->fe_len) { - ext4_mb_use_best_found(ac, e4b); - return; - } - } -} - -/* - * The routine checks whether found extent is good enough. If it is, - * then the extent gets marked used and flag is set to the context - * to stop scanning. Otherwise, the extent is compared with the - * previous found extent and if new one is better, then it's stored - * in the context. Later, the best found extent will be used, if - * mballoc can't find good enough extent. - * - * FIXME: real allocation policy is to be designed yet! - */ -static void ext4_mb_measure_extent(struct ext4_allocation_context *ac, - struct ext4_free_extent *ex, - struct ext4_buddy *e4b) -{ - struct ext4_free_extent *bex = &ac->ac_b_ex; - struct ext4_free_extent *gex = &ac->ac_g_ex; - - BUG_ON(ex->fe_len <= 0); - BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); - BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); - BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); - - ac->ac_found++; - - /* - * The special case - take what you catch first - */ - if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) { - *bex = *ex; - ext4_mb_use_best_found(ac, e4b); - return; - } - - /* - * Let's check whether the chuck is good enough - */ - if (ex->fe_len == gex->fe_len) { - *bex = *ex; - ext4_mb_use_best_found(ac, e4b); - return; - } - - /* - * If this is first found extent, just store it in the context - */ - if (bex->fe_len == 0) { - *bex = *ex; - return; - } - - /* - * If new found extent is better, store it in the context - */ - if (bex->fe_len < gex->fe_len) { - /* if the request isn't satisfied, any found extent - * larger than previous best one is better */ - if (ex->fe_len > bex->fe_len) - *bex = *ex; - } else if (ex->fe_len > gex->fe_len) { - /* if the request is satisfied, then we try to find - * an extent that still satisfy the request, but is - * smaller than previous one */ - if (ex->fe_len < bex->fe_len) - *bex = *ex; - } - - ext4_mb_check_limits(ac, e4b, 0); -} - -static int ext4_mb_try_best_found(struct ext4_allocation_context *ac, - struct ext4_buddy *e4b) -{ - struct ext4_free_extent ex = ac->ac_b_ex; - ext4_group_t group = ex.fe_group; - int max; - int err; - - BUG_ON(ex.fe_len <= 0); - err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); - if (err) - return err; - - ext4_lock_group(ac->ac_sb, group); - max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex); - - if (max > 0) { - ac->ac_b_ex = ex; - ext4_mb_use_best_found(ac, e4b); - } - - ext4_unlock_group(ac->ac_sb, group); - ext4_mb_release_desc(e4b); - - return 0; -} - -static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, - struct ext4_buddy *e4b) -{ - ext4_group_t group = ac->ac_g_ex.fe_group; - int max; - int err; - struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); - struct ext4_super_block *es = sbi->s_es; - struct ext4_free_extent ex; - - if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) - return 0; - - err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); - if (err) - return err; - - ext4_lock_group(ac->ac_sb, group); - max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start, - ac->ac_g_ex.fe_len, &ex); - - if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { - ext4_fsblk_t start; - - start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + - ex.fe_start + le32_to_cpu(es->s_first_data_block); - /* use do_div to get remainder (would be 64-bit modulo) */ - if (do_div(start, sbi->s_stripe) == 0) { - ac->ac_found++; - ac->ac_b_ex = ex; - ext4_mb_use_best_found(ac, e4b); - } - } else if (max >= ac->ac_g_ex.fe_len) { - BUG_ON(ex.fe_len <= 0); - BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group); - BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start); - ac->ac_found++; - ac->ac_b_ex = ex; - ext4_mb_use_best_found(ac, e4b); - } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) { - /* Sometimes, caller may want to merge even small - * number of blocks to an existing extent */ - BUG_ON(ex.fe_len <= 0); - BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group); - BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start); - ac->ac_found++; - ac->ac_b_ex = ex; - ext4_mb_use_best_found(ac, e4b); - } - ext4_unlock_group(ac->ac_sb, group); - ext4_mb_release_desc(e4b); - - return 0; -} - -/* - * The routine scans buddy structures (not bitmap!) from given order - * to max order and tries to find big enough chunk to satisfy the req - */ -static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, - struct ext4_buddy *e4b) -{ - struct super_block *sb = ac->ac_sb; - struct ext4_group_info *grp = e4b->bd_info; - void *buddy; - int i; - int k; - int max; - - BUG_ON(ac->ac_2order <= 0); - for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) { - if (grp->bb_counters[i] == 0) - continue; - - buddy = mb_find_buddy(e4b, i, &max); - BUG_ON(buddy == NULL); - - k = ext4_find_next_zero_bit(buddy, max, 0); - BUG_ON(k >= max); - - ac->ac_found++; - - ac->ac_b_ex.fe_len = 1 << i; - ac->ac_b_ex.fe_start = k << i; - ac->ac_b_ex.fe_group = e4b->bd_group; - - ext4_mb_use_best_found(ac, e4b); - - BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len); - - if (EXT4_SB(sb)->s_mb_stats) - atomic_inc(&EXT4_SB(sb)->s_bal_2orders); - - break; - } -} - -/* - * The routine scans the group and measures all found extents. - * In order to optimize scanning, caller must pass number of - * free blocks in the group, so the routine can know upper limit. - */ -static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, - struct ext4_buddy *e4b) -{ - struct super_block *sb = ac->ac_sb; - void *bitmap = EXT4_MB_BITMAP(e4b); - struct ext4_free_extent ex; - int i; - int free; - - free = e4b->bd_info->bb_free; - BUG_ON(free <= 0); - - i = e4b->bd_info->bb_first_free; - - while (free && ac->ac_status == AC_STATUS_CONTINUE) { - i = ext4_find_next_zero_bit(bitmap, - EXT4_BLOCKS_PER_GROUP(sb), i); - if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { - BUG_ON(free != 0); - break; - } - - mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); - BUG_ON(ex.fe_len <= 0); - BUG_ON(free < ex.fe_len); - - ext4_mb_measure_extent(ac, &ex, e4b); - - i += ex.fe_len; - free -= ex.fe_len; - } - - ext4_mb_check_limits(ac, e4b, 1); -} - -/* - * This is a special case for storages like raid5 - * we try to find stripe-aligned chunks for stripe-size requests - * XXX should do so at least for multiples of stripe size as well - */ -static void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, - struct ext4_buddy *e4b) -{ - struct super_block *sb = ac->ac_sb; - struct ext4_sb_info *sbi = EXT4_SB(sb); - void *bitmap = EXT4_MB_BITMAP(e4b); - struct ext4_free_extent ex; - ext4_fsblk_t first_group_block; - ext4_fsblk_t a; - ext4_grpblk_t i; - int max; - - BUG_ON(sbi->s_stripe == 0); - - /* find first stripe-aligned block in group */ - first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb) - + le32_to_cpu(sbi->s_es->s_first_data_block); - a = first_group_block + sbi->s_stripe - 1; - do_div(a, sbi->s_stripe); - i = (a * sbi->s_stripe) - first_group_block; - - while (i < EXT4_BLOCKS_PER_GROUP(sb)) { - if (!mb_test_bit(i, bitmap)) { - max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); - if (max >= sbi->s_stripe) { - ac->ac_found++; - ac->ac_b_ex = ex; - ext4_mb_use_best_found(ac, e4b); - break; - } - } - i += sbi->s_stripe; - } -} - -static int ext4_mb_good_group(struct ext4_allocation_context *ac, - ext4_group_t group, int cr) -{ - unsigned free, fragments; - unsigned i, bits; - struct ext4_group_desc *desc; - struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); - - BUG_ON(cr < 0 || cr >= 4); - BUG_ON(EXT4_MB_GRP_NEED_INIT(grp)); - - free = grp->bb_free; - fragments = grp->bb_fragments; - if (free == 0) - return 0; - if (fragments == 0) - return 0; - - switch (cr) { - case 0: - BUG_ON(ac->ac_2order == 0); - /* If this group is uninitialized, skip it initially */ - desc = ext4_get_group_desc(ac->ac_sb, group, NULL); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) - return 0; - - bits = ac->ac_sb->s_blocksize_bits + 1; - for (i = ac->ac_2order; i <= bits; i++) - if (grp->bb_counters[i] > 0) - return 1; - break; - case 1: - if ((free / fragments) >= ac->ac_g_ex.fe_len) - return 1; - break; - case 2: - if (free >= ac->ac_g_ex.fe_len) - return 1; - break; - case 3: - return 1; - default: - BUG(); - } - - return 0; -} - -static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) -{ - ext4_group_t group; - ext4_group_t i; - int cr; - int err = 0; - int bsbits; - struct ext4_sb_info *sbi; - struct super_block *sb; - struct ext4_buddy e4b; - loff_t size, isize; - - sb = ac->ac_sb; - sbi = EXT4_SB(sb); - BUG_ON(ac->ac_status == AC_STATUS_FOUND); - - /* first, try the goal */ - err = ext4_mb_find_by_goal(ac, &e4b); - if (err || ac->ac_status == AC_STATUS_FOUND) - goto out; - - if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) - goto out; - - /* - * ac->ac2_order is set only if the fe_len is a power of 2 - * if ac2_order is set we also set criteria to 0 so that we - * try exact allocation using buddy. - */ - i = fls(ac->ac_g_ex.fe_len); - ac->ac_2order = 0; - /* - * We search using buddy data only if the order of the request - * is greater than equal to the sbi_s_mb_order2_reqs - * You can tune it via /proc/fs/ext4//order2_req - */ - if (i >= sbi->s_mb_order2_reqs) { - /* - * This should tell if fe_len is exactly power of 2 - */ - if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0) - ac->ac_2order = i - 1; - } - - bsbits = ac->ac_sb->s_blocksize_bits; - /* if stream allocation is enabled, use global goal */ - size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; - isize = i_size_read(ac->ac_inode) >> bsbits; - if (size < isize) - size = isize; - - if (size < sbi->s_mb_stream_request && - (ac->ac_flags & EXT4_MB_HINT_DATA)) { - /* TBD: may be hot point */ - spin_lock(&sbi->s_md_lock); - ac->ac_g_ex.fe_group = sbi->s_mb_last_group; - ac->ac_g_ex.fe_start = sbi->s_mb_last_start; - spin_unlock(&sbi->s_md_lock); - } - - /* searching for the right group start from the goal value specified */ - group = ac->ac_g_ex.fe_group; - - /* Let's just scan groups to find more-less suitable blocks */ - cr = ac->ac_2order ? 0 : 1; - /* - * cr == 0 try to get exact allocation, - * cr == 3 try to get anything - */ -repeat: - for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) { - ac->ac_criteria = cr; - for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { - struct ext4_group_info *grp; - struct ext4_group_desc *desc; - - if (group == EXT4_SB(sb)->s_groups_count) - group = 0; - - /* quick check to skip empty groups */ - grp = ext4_get_group_info(ac->ac_sb, group); - if (grp->bb_free == 0) - continue; - - /* - * if the group is already init we check whether it is - * a good group and if not we don't load the buddy - */ - if (EXT4_MB_GRP_NEED_INIT(grp)) { - /* - * we need full data about the group - * to make a good selection - */ - err = ext4_mb_load_buddy(sb, group, &e4b); - if (err) - goto out; - ext4_mb_release_desc(&e4b); - } - - /* - * If the particular group doesn't satisfy our - * criteria we continue with the next group - */ - if (!ext4_mb_good_group(ac, group, cr)) - continue; - - err = ext4_mb_load_buddy(sb, group, &e4b); - if (err) - goto out; - - ext4_lock_group(sb, group); - if (!ext4_mb_good_group(ac, group, cr)) { - /* someone did allocation from this group */ - ext4_unlock_group(sb, group); - ext4_mb_release_desc(&e4b); - continue; - } - - ac->ac_groups_scanned++; - desc = ext4_get_group_desc(sb, group, NULL); - if (cr == 0 || (desc->bg_flags & - cpu_to_le16(EXT4_BG_BLOCK_UNINIT) && - ac->ac_2order != 0)) - ext4_mb_simple_scan_group(ac, &e4b); - else if (cr == 1 && - ac->ac_g_ex.fe_len == sbi->s_stripe) - ext4_mb_scan_aligned(ac, &e4b); - else - ext4_mb_complex_scan_group(ac, &e4b); - - ext4_unlock_group(sb, group); - ext4_mb_release_desc(&e4b); - - if (ac->ac_status != AC_STATUS_CONTINUE) - break; - } - } - - if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND && - !(ac->ac_flags & EXT4_MB_HINT_FIRST)) { - /* - * We've been searching too long. Let's try to allocate - * the best chunk we've found so far - */ - - ext4_mb_try_best_found(ac, &e4b); - if (ac->ac_status != AC_STATUS_FOUND) { - /* - * Someone more lucky has already allocated it. - * The only thing we can do is just take first - * found block(s) - printk(KERN_DEBUG "EXT4-fs: someone won our chunk\n"); - */ - ac->ac_b_ex.fe_group = 0; - ac->ac_b_ex.fe_start = 0; - ac->ac_b_ex.fe_len = 0; - ac->ac_status = AC_STATUS_CONTINUE; - ac->ac_flags |= EXT4_MB_HINT_FIRST; - cr = 3; - atomic_inc(&sbi->s_mb_lost_chunks); - goto repeat; - } - } -out: - return err; -} - -#ifdef EXT4_MB_HISTORY -struct ext4_mb_proc_session { - struct ext4_mb_history *history; - struct super_block *sb; - int start; - int max; -}; - -static void *ext4_mb_history_skip_empty(struct ext4_mb_proc_session *s, - struct ext4_mb_history *hs, - int first) -{ - if (hs == s->history + s->max) - hs = s->history; - if (!first && hs == s->history + s->start) - return NULL; - while (hs->orig.fe_len == 0) { - hs++; - if (hs == s->history + s->max) - hs = s->history; - if (hs == s->history + s->start) - return NULL; - } - return hs; -} - -static void *ext4_mb_seq_history_start(struct seq_file *seq, loff_t *pos) -{ - struct ext4_mb_proc_session *s = seq->private; - struct ext4_mb_history *hs; - int l = *pos; - - if (l == 0) - return SEQ_START_TOKEN; - hs = ext4_mb_history_skip_empty(s, s->history + s->start, 1); - if (!hs) - return NULL; - while (--l && (hs = ext4_mb_history_skip_empty(s, ++hs, 0)) != NULL); - return hs; -} - -static void *ext4_mb_seq_history_next(struct seq_file *seq, void *v, - loff_t *pos) -{ - struct ext4_mb_proc_session *s = seq->private; - struct ext4_mb_history *hs = v; - - ++*pos; - if (v == SEQ_START_TOKEN) - return ext4_mb_history_skip_empty(s, s->history + s->start, 1); - else - return ext4_mb_history_skip_empty(s, ++hs, 0); -} - -static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) -{ - char buf[25], buf2[25], buf3[25], *fmt; - struct ext4_mb_history *hs = v; - - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " - "%-5s %-2s %-5s %-5s %-5s %-6s\n", - "pid", "inode", "original", "goal", "result", "found", - "grps", "cr", "flags", "merge", "tail", "broken"); - return 0; - } - - if (hs->op == EXT4_MB_HISTORY_ALLOC) { - fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " - "%-5u %-5s %-5u %-6u\n"; - sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group, - hs->result.fe_start, hs->result.fe_len, - hs->result.fe_logical); - sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group, - hs->orig.fe_start, hs->orig.fe_len, - hs->orig.fe_logical); - sprintf(buf3, "%lu/%d/%u@%u", hs->goal.fe_group, - hs->goal.fe_start, hs->goal.fe_len, - hs->goal.fe_logical); - seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2, - hs->found, hs->groups, hs->cr, hs->flags, - hs->merged ? "M" : "", hs->tail, - hs->buddy ? 1 << hs->buddy : 0); - } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) { - fmt = "%-5u %-8u %-23s %-23s %-23s\n"; - sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group, - hs->result.fe_start, hs->result.fe_len, - hs->result.fe_logical); - sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group, - hs->orig.fe_start, hs->orig.fe_len, - hs->orig.fe_logical); - seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2); - } else if (hs->op == EXT4_MB_HISTORY_DISCARD) { - sprintf(buf2, "%lu/%d/%u", hs->result.fe_group, - hs->result.fe_start, hs->result.fe_len); - seq_printf(seq, "%-5u %-8u %-23s discard\n", - hs->pid, hs->ino, buf2); - } else if (hs->op == EXT4_MB_HISTORY_FREE) { - sprintf(buf2, "%lu/%d/%u", hs->result.fe_group, - hs->result.fe_start, hs->result.fe_len); - seq_printf(seq, "%-5u %-8u %-23s free\n", - hs->pid, hs->ino, buf2); - } - return 0; -} - -static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v) -{ -} - -static struct seq_operations ext4_mb_seq_history_ops = { - .start = ext4_mb_seq_history_start, - .next = ext4_mb_seq_history_next, - .stop = ext4_mb_seq_history_stop, - .show = ext4_mb_seq_history_show, -}; - -static int ext4_mb_seq_history_open(struct inode *inode, struct file *file) -{ - struct super_block *sb = PDE(inode)->data; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_mb_proc_session *s; - int rc; - int size; - - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s == NULL) - return -ENOMEM; - s->sb = sb; - size = sizeof(struct ext4_mb_history) * sbi->s_mb_history_max; - s->history = kmalloc(size, GFP_KERNEL); - if (s->history == NULL) { - kfree(s); - return -ENOMEM; - } - - spin_lock(&sbi->s_mb_history_lock); - memcpy(s->history, sbi->s_mb_history, size); - s->max = sbi->s_mb_history_max; - s->start = sbi->s_mb_history_cur % s->max; - spin_unlock(&sbi->s_mb_history_lock); - - rc = seq_open(file, &ext4_mb_seq_history_ops); - if (rc == 0) { - struct seq_file *m = (struct seq_file *)file->private_data; - m->private = s; - } else { - kfree(s->history); - kfree(s); - } - return rc; - -} - -static int ext4_mb_seq_history_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = (struct seq_file *)file->private_data; - struct ext4_mb_proc_session *s = seq->private; - kfree(s->history); - kfree(s); - return seq_release(inode, file); -} - -static ssize_t ext4_mb_seq_history_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *ppos) -{ - struct seq_file *seq = (struct seq_file *)file->private_data; - struct ext4_mb_proc_session *s = seq->private; - struct super_block *sb = s->sb; - char str[32]; - int value; - - if (count >= sizeof(str)) { - printk(KERN_ERR "EXT4-fs: %s string too long, max %u bytes\n", - "mb_history", (int)sizeof(str)); - return -EOVERFLOW; - } - - if (copy_from_user(str, buffer, count)) - return -EFAULT; - - value = simple_strtol(str, NULL, 0); - if (value < 0) - return -ERANGE; - EXT4_SB(sb)->s_mb_history_filter = value; - - return count; -} - -static struct file_operations ext4_mb_seq_history_fops = { - .owner = THIS_MODULE, - .open = ext4_mb_seq_history_open, - .read = seq_read, - .write = ext4_mb_seq_history_write, - .llseek = seq_lseek, - .release = ext4_mb_seq_history_release, -}; - -static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) -{ - struct super_block *sb = seq->private; - struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_group_t group; - - if (*pos < 0 || *pos >= sbi->s_groups_count) - return NULL; - - group = *pos + 1; - return (void *) group; -} - -static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct super_block *sb = seq->private; - struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_group_t group; - - ++*pos; - if (*pos < 0 || *pos >= sbi->s_groups_count) - return NULL; - group = *pos + 1; - return (void *) group;; -} - -static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) -{ - struct super_block *sb = seq->private; - long group = (long) v; - int i; - int err; - struct ext4_buddy e4b; - struct sg { - struct ext4_group_info info; - unsigned short counters[16]; - } sg; - - group--; - if (group == 0) - seq_printf(seq, "#%-5s: %-5s %-5s %-5s " - "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s " - "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n", - "group", "free", "frags", "first", - "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6", - "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13"); - - i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + - sizeof(struct ext4_group_info); - err = ext4_mb_load_buddy(sb, group, &e4b); - if (err) { - seq_printf(seq, "#%-5lu: I/O error\n", group); - return 0; - } - ext4_lock_group(sb, group); - memcpy(&sg, ext4_get_group_info(sb, group), i); - ext4_unlock_group(sb, group); - ext4_mb_release_desc(&e4b); - - seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free, - sg.info.bb_fragments, sg.info.bb_first_free); - for (i = 0; i <= 13; i++) - seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? - sg.info.bb_counters[i] : 0); - seq_printf(seq, " ]\n"); - - return 0; -} - -static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v) -{ -} - -static struct seq_operations ext4_mb_seq_groups_ops = { - .start = ext4_mb_seq_groups_start, - .next = ext4_mb_seq_groups_next, - .stop = ext4_mb_seq_groups_stop, - .show = ext4_mb_seq_groups_show, -}; - -static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) -{ - struct super_block *sb = PDE(inode)->data; - int rc; - - rc = seq_open(file, &ext4_mb_seq_groups_ops); - if (rc == 0) { - struct seq_file *m = (struct seq_file *)file->private_data; - m->private = sb; - } - return rc; - -} - -static struct file_operations ext4_mb_seq_groups_fops = { - .owner = THIS_MODULE, - .open = ext4_mb_seq_groups_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static void ext4_mb_history_release(struct super_block *sb) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - - remove_proc_entry("mb_groups", sbi->s_mb_proc); - remove_proc_entry("mb_history", sbi->s_mb_proc); - - kfree(sbi->s_mb_history); -} - -static void ext4_mb_history_init(struct super_block *sb) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - int i; - - if (sbi->s_mb_proc != NULL) { - struct proc_dir_entry *p; - p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); - if (p) { - p->proc_fops = &ext4_mb_seq_history_fops; - p->data = sb; - } - p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc); - if (p) { - p->proc_fops = &ext4_mb_seq_groups_fops; - p->data = sb; - } - } - - sbi->s_mb_history_max = 1000; - sbi->s_mb_history_cur = 0; - spin_lock_init(&sbi->s_mb_history_lock); - i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); - sbi->s_mb_history = kmalloc(i, GFP_KERNEL); - if (likely(sbi->s_mb_history != NULL)) - memset(sbi->s_mb_history, 0, i); - /* if we can't allocate history, then we simple won't use it */ -} - -static void ext4_mb_store_history(struct ext4_allocation_context *ac) -{ - struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); - struct ext4_mb_history h; - - if (unlikely(sbi->s_mb_history == NULL)) - return; - - if (!(ac->ac_op & sbi->s_mb_history_filter)) - return; - - h.op = ac->ac_op; - h.pid = current->pid; - h.ino = ac->ac_inode ? ac->ac_inode->i_ino : 0; - h.orig = ac->ac_o_ex; - h.result = ac->ac_b_ex; - h.flags = ac->ac_flags; - h.found = ac->ac_found; - h.groups = ac->ac_groups_scanned; - h.cr = ac->ac_criteria; - h.tail = ac->ac_tail; - h.buddy = ac->ac_buddy; - h.merged = 0; - if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) { - if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && - ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) - h.merged = 1; - h.goal = ac->ac_g_ex; - h.result = ac->ac_f_ex; - } - - spin_lock(&sbi->s_mb_history_lock); - memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h)); - if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max) - sbi->s_mb_history_cur = 0; - spin_unlock(&sbi->s_mb_history_lock); -} - -#else -#define ext4_mb_history_release(sb) -#define ext4_mb_history_init(sb) -#endif - -static int ext4_mb_init_backend(struct super_block *sb) -{ - ext4_group_t i; - int j, len, metalen; - struct ext4_sb_info *sbi = EXT4_SB(sb); - int num_meta_group_infos = - (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) >> - EXT4_DESC_PER_BLOCK_BITS(sb); - struct ext4_group_info **meta_group_info; - - /* An 8TB filesystem with 64-bit pointers requires a 4096 byte - * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. - * So a two level scheme suffices for now. */ - sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * - num_meta_group_infos, GFP_KERNEL); - if (sbi->s_group_info == NULL) { - printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); - return -ENOMEM; - } - sbi->s_buddy_cache = new_inode(sb); - if (sbi->s_buddy_cache == NULL) { - printk(KERN_ERR "EXT4-fs: can't get new inode\n"); - goto err_freesgi; - } - EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; - - metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb); - for (i = 0; i < num_meta_group_infos; i++) { - if ((i + 1) == num_meta_group_infos) - metalen = sizeof(*meta_group_info) * - (sbi->s_groups_count - - (i << EXT4_DESC_PER_BLOCK_BITS(sb))); - meta_group_info = kmalloc(metalen, GFP_KERNEL); - if (meta_group_info == NULL) { - printk(KERN_ERR "EXT4-fs: can't allocate mem for a " - "buddy group\n"); - goto err_freemeta; - } - sbi->s_group_info[i] = meta_group_info; - } - - /* - * calculate needed size. if change bb_counters size, - * don't forget about ext4_mb_generate_buddy() - */ - len = sizeof(struct ext4_group_info); - len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2); - for (i = 0; i < sbi->s_groups_count; i++) { - struct ext4_group_desc *desc; - - meta_group_info = - sbi->s_group_info[i >> EXT4_DESC_PER_BLOCK_BITS(sb)]; - j = i & (EXT4_DESC_PER_BLOCK(sb) - 1); - - meta_group_info[j] = kzalloc(len, GFP_KERNEL); - if (meta_group_info[j] == NULL) { - printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); - i--; - goto err_freebuddy; - } - desc = ext4_get_group_desc(sb, i, NULL); - if (desc == NULL) { - printk(KERN_ERR - "EXT4-fs: can't read descriptor %lu\n", i); - goto err_freebuddy; - } - memset(meta_group_info[j], 0, len); - set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, - &(meta_group_info[j]->bb_state)); - - /* - * initialize bb_free to be able to skip - * empty groups without initialization - */ - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - meta_group_info[j]->bb_free = - ext4_free_blocks_after_init(sb, i, desc); - } else { - meta_group_info[j]->bb_free = - le16_to_cpu(desc->bg_free_blocks_count); - } - - INIT_LIST_HEAD(&meta_group_info[j]->bb_prealloc_list); - -#ifdef DOUBLE_CHECK - { - struct buffer_head *bh; - meta_group_info[j]->bb_bitmap = - kmalloc(sb->s_blocksize, GFP_KERNEL); - BUG_ON(meta_group_info[j]->bb_bitmap == NULL); - bh = read_block_bitmap(sb, i); - BUG_ON(bh == NULL); - memcpy(meta_group_info[j]->bb_bitmap, bh->b_data, - sb->s_blocksize); - put_bh(bh); - } -#endif - - } - - return 0; - -err_freebuddy: - while (i >= 0) { - kfree(ext4_get_group_info(sb, i)); - i--; - } - i = num_meta_group_infos; -err_freemeta: - while (--i >= 0) - kfree(sbi->s_group_info[i]); - iput(sbi->s_buddy_cache); -err_freesgi: - kfree(sbi->s_group_info); - return -ENOMEM; -} - -int ext4_mb_init(struct super_block *sb, int needs_recovery) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - unsigned i; - unsigned offset; - unsigned max; - - if (!test_opt(sb, MBALLOC)) - return 0; - - i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); - - sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); - if (sbi->s_mb_offsets == NULL) { - clear_opt(sbi->s_mount_opt, MBALLOC); - return -ENOMEM; - } - sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); - if (sbi->s_mb_maxs == NULL) { - clear_opt(sbi->s_mount_opt, MBALLOC); - kfree(sbi->s_mb_maxs); - return -ENOMEM; - } - - /* order 0 is regular bitmap */ - sbi->s_mb_maxs[0] = sb->s_blocksize << 3; - sbi->s_mb_offsets[0] = 0; - - i = 1; - offset = 0; - max = sb->s_blocksize << 2; - do { - sbi->s_mb_offsets[i] = offset; - sbi->s_mb_maxs[i] = max; - offset += 1 << (sb->s_blocksize_bits - i); - max = max >> 1; - i++; - } while (i <= sb->s_blocksize_bits + 1); - - /* init file for buddy data */ - i = ext4_mb_init_backend(sb); - if (i) { - clear_opt(sbi->s_mount_opt, MBALLOC); - kfree(sbi->s_mb_offsets); - kfree(sbi->s_mb_maxs); - return i; - } - - spin_lock_init(&sbi->s_md_lock); - INIT_LIST_HEAD(&sbi->s_active_transaction); - INIT_LIST_HEAD(&sbi->s_closed_transaction); - INIT_LIST_HEAD(&sbi->s_committed_transaction); - spin_lock_init(&sbi->s_bal_lock); - - sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; - sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; - sbi->s_mb_stats = MB_DEFAULT_STATS; - sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; - sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; - sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; - sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; - - i = sizeof(struct ext4_locality_group) * NR_CPUS; - sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); - if (sbi->s_locality_groups == NULL) { - clear_opt(sbi->s_mount_opt, MBALLOC); - kfree(sbi->s_mb_offsets); - kfree(sbi->s_mb_maxs); - return -ENOMEM; - } - for (i = 0; i < NR_CPUS; i++) { - struct ext4_locality_group *lg; - lg = &sbi->s_locality_groups[i]; - mutex_init(&lg->lg_mutex); - INIT_LIST_HEAD(&lg->lg_prealloc_list); - spin_lock_init(&lg->lg_prealloc_lock); - } - - ext4_mb_init_per_dev_proc(sb); - ext4_mb_history_init(sb); - - printk("EXT4-fs: mballoc enabled\n"); - return 0; -} - -/* need to called with ext4 group lock (ext4_lock_group) */ -static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) -{ - struct ext4_prealloc_space *pa; - struct list_head *cur, *tmp; - int count = 0; - - list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) { - pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); - list_del(&pa->pa_group_list); - count++; - kfree(pa); - } - if (count) - mb_debug("mballoc: %u PAs left\n", count); - -} - -int ext4_mb_release(struct super_block *sb) -{ - ext4_group_t i; - int num_meta_group_infos; - struct ext4_group_info *grinfo; - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (!test_opt(sb, MBALLOC)) - return 0; - - /* release freed, non-committed blocks */ - spin_lock(&sbi->s_md_lock); - list_splice_init(&sbi->s_closed_transaction, - &sbi->s_committed_transaction); - list_splice_init(&sbi->s_active_transaction, - &sbi->s_committed_transaction); - spin_unlock(&sbi->s_md_lock); - ext4_mb_free_committed_blocks(sb); - - if (sbi->s_group_info) { - for (i = 0; i < sbi->s_groups_count; i++) { - grinfo = ext4_get_group_info(sb, i); -#ifdef DOUBLE_CHECK - kfree(grinfo->bb_bitmap); -#endif - ext4_lock_group(sb, i); - ext4_mb_cleanup_pa(grinfo); - ext4_unlock_group(sb, i); - kfree(grinfo); - } - num_meta_group_infos = (sbi->s_groups_count + - EXT4_DESC_PER_BLOCK(sb) - 1) >> - EXT4_DESC_PER_BLOCK_BITS(sb); - for (i = 0; i < num_meta_group_infos; i++) - kfree(sbi->s_group_info[i]); - kfree(sbi->s_group_info); - } - kfree(sbi->s_mb_offsets); - kfree(sbi->s_mb_maxs); - if (sbi->s_buddy_cache) - iput(sbi->s_buddy_cache); - if (sbi->s_mb_stats) { - printk(KERN_INFO - "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n", - atomic_read(&sbi->s_bal_allocated), - atomic_read(&sbi->s_bal_reqs), - atomic_read(&sbi->s_bal_success)); - printk(KERN_INFO - "EXT4-fs: mballoc: %u extents scanned, %u goal hits, " - "%u 2^N hits, %u breaks, %u lost\n", - atomic_read(&sbi->s_bal_ex_scanned), - atomic_read(&sbi->s_bal_goals), - atomic_read(&sbi->s_bal_2orders), - atomic_read(&sbi->s_bal_breaks), - atomic_read(&sbi->s_mb_lost_chunks)); - printk(KERN_INFO - "EXT4-fs: mballoc: %lu generated and it took %Lu\n", - sbi->s_mb_buddies_generated++, - sbi->s_mb_generation_time); - printk(KERN_INFO - "EXT4-fs: mballoc: %u preallocated, %u discarded\n", - atomic_read(&sbi->s_mb_preallocated), - atomic_read(&sbi->s_mb_discarded)); - } - - kfree(sbi->s_locality_groups); - - ext4_mb_history_release(sb); - ext4_mb_destroy_per_dev_proc(sb); - - return 0; -} - -static void ext4_mb_free_committed_blocks(struct super_block *sb) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - int err; - int i; - int count = 0; - int count2 = 0; - struct ext4_free_metadata *md; - struct ext4_buddy e4b; - - if (list_empty(&sbi->s_committed_transaction)) - return; - - /* there is committed blocks to be freed yet */ - do { - /* get next array of blocks */ - md = NULL; - spin_lock(&sbi->s_md_lock); - if (!list_empty(&sbi->s_committed_transaction)) { - md = list_entry(sbi->s_committed_transaction.next, - struct ext4_free_metadata, list); - list_del(&md->list); - } - spin_unlock(&sbi->s_md_lock); - - if (md == NULL) - break; - - mb_debug("gonna free %u blocks in group %lu (0x%p):", - md->num, md->group, md); - - err = ext4_mb_load_buddy(sb, md->group, &e4b); - /* we expect to find existing buddy because it's pinned */ - BUG_ON(err != 0); - - /* there are blocks to put in buddy to make them really free */ - count += md->num; - count2++; - ext4_lock_group(sb, md->group); - for (i = 0; i < md->num; i++) { - mb_debug(" %u", md->blocks[i]); - err = mb_free_blocks(NULL, &e4b, md->blocks[i], 1); - BUG_ON(err != 0); - } - mb_debug("\n"); - ext4_unlock_group(sb, md->group); - - /* balance refcounts from ext4_mb_free_metadata() */ - page_cache_release(e4b.bd_buddy_page); - page_cache_release(e4b.bd_bitmap_page); - - kfree(md); - ext4_mb_release_desc(&e4b); - - } while (md); - - mb_debug("freed %u blocks in %u structures\n", count, count2); -} - -#define EXT4_ROOT "ext4" -#define EXT4_MB_STATS_NAME "stats" -#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" -#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" -#define EXT4_MB_ORDER2_REQ "order2_req" -#define EXT4_MB_STREAM_REQ "stream_req" -#define EXT4_MB_GROUP_PREALLOC "group_prealloc" - - - -#define MB_PROC_VALUE_READ(name) \ -static int ext4_mb_read_##name(char *page, char **start, \ - off_t off, int count, int *eof, void *data) \ -{ \ - struct ext4_sb_info *sbi = data; \ - int len; \ - *eof = 1; \ - if (off != 0) \ - return 0; \ - len = sprintf(page, "%ld\n", sbi->s_mb_##name); \ - *start = page; \ - return len; \ -} - -#define MB_PROC_VALUE_WRITE(name) \ -static int ext4_mb_write_##name(struct file *file, \ - const char __user *buf, unsigned long cnt, void *data) \ -{ \ - struct ext4_sb_info *sbi = data; \ - char str[32]; \ - long value; \ - if (cnt >= sizeof(str)) \ - return -EINVAL; \ - if (copy_from_user(str, buf, cnt)) \ - return -EFAULT; \ - value = simple_strtol(str, NULL, 0); \ - if (value <= 0) \ - return -ERANGE; \ - sbi->s_mb_##name = value; \ - return cnt; \ -} - -MB_PROC_VALUE_READ(stats); -MB_PROC_VALUE_WRITE(stats); -MB_PROC_VALUE_READ(max_to_scan); -MB_PROC_VALUE_WRITE(max_to_scan); -MB_PROC_VALUE_READ(min_to_scan); -MB_PROC_VALUE_WRITE(min_to_scan); -MB_PROC_VALUE_READ(order2_reqs); -MB_PROC_VALUE_WRITE(order2_reqs); -MB_PROC_VALUE_READ(stream_request); -MB_PROC_VALUE_WRITE(stream_request); -MB_PROC_VALUE_READ(group_prealloc); -MB_PROC_VALUE_WRITE(group_prealloc); - -#define MB_PROC_HANDLER(name, var) \ -do { \ - proc = create_proc_entry(name, mode, sbi->s_mb_proc); \ - if (proc == NULL) { \ - printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ - goto err_out; \ - } \ - proc->data = sbi; \ - proc->read_proc = ext4_mb_read_##var ; \ - proc->write_proc = ext4_mb_write_##var; \ -} while (0) - -static int ext4_mb_init_per_dev_proc(struct super_block *sb) -{ - mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct proc_dir_entry *proc; - char devname[64]; - - snprintf(devname, sizeof(devname) - 1, "%s", - bdevname(sb->s_bdev, devname)); - sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); - - MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); - MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan); - MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan); - MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs); - MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request); - MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc); - - return 0; - -err_out: - printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); - remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); - remove_proc_entry(devname, proc_root_ext4); - sbi->s_mb_proc = NULL; - - return -ENOMEM; -} - -static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - char devname[64]; - - if (sbi->s_mb_proc == NULL) - return -EINVAL; - - snprintf(devname, sizeof(devname) - 1, "%s", - bdevname(sb->s_bdev, devname)); - remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); - remove_proc_entry(devname, proc_root_ext4); - - return 0; -} - -int __init init_ext4_mballoc(void) -{ - ext4_pspace_cachep = - kmem_cache_create("ext4_prealloc_space", - sizeof(struct ext4_prealloc_space), - 0, SLAB_RECLAIM_ACCOUNT, NULL); - if (ext4_pspace_cachep == NULL) - return -ENOMEM; - -#ifdef CONFIG_PROC_FS - proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); - if (proc_root_ext4 == NULL) - printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); -#endif - - return 0; -} - -void exit_ext4_mballoc(void) -{ - /* XXX: synchronize_rcu(); */ - kmem_cache_destroy(ext4_pspace_cachep); -#ifdef CONFIG_PROC_FS - remove_proc_entry(EXT4_ROOT, proc_root_fs); -#endif -} - - -/* - * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps - * Returns 0 if success or error code - */ -static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, - handle_t *handle) -{ - struct buffer_head *bitmap_bh = NULL; - struct ext4_super_block *es; - struct ext4_group_desc *gdp; - struct buffer_head *gdp_bh; - struct ext4_sb_info *sbi; - struct super_block *sb; - ext4_fsblk_t block; - int err; - - BUG_ON(ac->ac_status != AC_STATUS_FOUND); - BUG_ON(ac->ac_b_ex.fe_len <= 0); - - sb = ac->ac_sb; - sbi = EXT4_SB(sb); - es = sbi->s_es; - - ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, - gdp->bg_free_blocks_count); - - err = -EIO; - bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); - if (!bitmap_bh) - goto out_err; - - err = ext4_journal_get_write_access(handle, bitmap_bh); - if (err) - goto out_err; - - err = -EIO; - gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh); - if (!gdp) - goto out_err; - - err = ext4_journal_get_write_access(handle, gdp_bh); - if (err) - goto out_err; - - block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb) - + ac->ac_b_ex.fe_start - + le32_to_cpu(es->s_first_data_block); - - if (block == ext4_block_bitmap(sb, gdp) || - block == ext4_inode_bitmap(sb, gdp) || - in_range(block, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) { - - ext4_error(sb, __FUNCTION__, - "Allocating block in system zone - block = %llu", - block); - } -#ifdef AGGRESSIVE_CHECK - { - int i; - for (i = 0; i < ac->ac_b_ex.fe_len; i++) { - BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i, - bitmap_bh->b_data)); - } - } -#endif - mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data, - ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - - spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - gdp->bg_free_blocks_count = - cpu_to_le16(ext4_free_blocks_after_init(sb, - ac->ac_b_ex.fe_group, - gdp)); - } - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - - ac->ac_b_ex.fe_len); - gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); - spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); - percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); - - err = ext4_journal_dirty_metadata(handle, bitmap_bh); - if (err) - goto out_err; - err = ext4_journal_dirty_metadata(handle, gdp_bh); - -out_err: - sb->s_dirt = 1; - put_bh(bitmap_bh); - return err; -} - -/* - * here we normalize request for locality group - * Group request are normalized to s_strip size if we set the same via mount - * option. If not we set it to s_mb_group_prealloc which can be configured via - * /proc/fs/ext4//group_prealloc - * - * XXX: should we try to preallocate more than the group has now? - */ -static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) -{ - struct super_block *sb = ac->ac_sb; - struct ext4_locality_group *lg = ac->ac_lg; - - BUG_ON(lg == NULL); - if (EXT4_SB(sb)->s_stripe) - ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; - else - ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; - mb_debug("#%u: goal %lu blocks for locality group\n", - current->pid, ac->ac_g_ex.fe_len); -} - -/* - * Normalization means making request better in terms of - * size and alignment - */ -static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, - struct ext4_allocation_request *ar) -{ - int bsbits, max; - ext4_lblk_t end; - struct list_head *cur; - loff_t size, orig_size, start_off; - ext4_lblk_t start, orig_start; - struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); - - /* do normalize only data requests, metadata requests - do not need preallocation */ - if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) - return; - - /* sometime caller may want exact blocks */ - if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) - return; - - /* caller may indicate that preallocation isn't - * required (it's a tail, for example) */ - if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC) - return; - - if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) { - ext4_mb_normalize_group_request(ac); - return ; - } - - bsbits = ac->ac_sb->s_blocksize_bits; - - /* first, let's learn actual file size - * given current request is allocated */ - size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; - size = size << bsbits; - if (size < i_size_read(ac->ac_inode)) - size = i_size_read(ac->ac_inode); - - /* max available blocks in a free group */ - max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1 - - EXT4_SB(ac->ac_sb)->s_itb_per_group; - -#define NRL_CHECK_SIZE(req, size, max,bits) \ - (req <= (size) || max <= ((size) >> bits)) - - /* first, try to predict filesize */ - /* XXX: should this table be tunable? */ - start_off = 0; - if (size <= 16 * 1024) { - size = 16 * 1024; - } else if (size <= 32 * 1024) { - size = 32 * 1024; - } else if (size <= 64 * 1024) { - size = 64 * 1024; - } else if (size <= 128 * 1024) { - size = 128 * 1024; - } else if (size <= 256 * 1024) { - size = 256 * 1024; - } else if (size <= 512 * 1024) { - size = 512 * 1024; - } else if (size <= 1024 * 1024) { - size = 1024 * 1024; - } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) { - start_off = ((loff_t)ac->ac_o_ex.fe_logical >> - (20 - bsbits)) << 20; - size = 1024 * 1024; - } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) { - start_off = ((loff_t)ac->ac_o_ex.fe_logical >> - (22 - bsbits)) << 22; - size = 4 * 1024 * 1024; - } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len, - (8<<20)>>bsbits, max, bsbits)) { - start_off = ((loff_t)ac->ac_o_ex.fe_logical >> - (23 - bsbits)) << 23; - size = 8 * 1024 * 1024; - } else { - start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits; - size = ac->ac_o_ex.fe_len << bsbits; - } - orig_size = size = size >> bsbits; - orig_start = start = start_off >> bsbits; - - /* don't cover already allocated blocks in selected range */ - if (ar->pleft && start <= ar->lleft) { - size -= ar->lleft + 1 - start; - start = ar->lleft + 1; - } - if (ar->pright && start + size - 1 >= ar->lright) - size -= start + size - ar->lright; - - end = start + size; - - /* check we don't cross already preallocated blocks */ - rcu_read_lock(); - list_for_each_rcu(cur, &ei->i_prealloc_list) { - struct ext4_prealloc_space *pa; - unsigned long pa_end; - - pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); - - if (pa->pa_deleted) - continue; - spin_lock(&pa->pa_lock); - if (pa->pa_deleted) { - spin_unlock(&pa->pa_lock); - continue; - } - - pa_end = pa->pa_lstart + pa->pa_len; - - /* PA must not overlap original request */ - BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || - ac->ac_o_ex.fe_logical < pa->pa_lstart)); - - /* skip PA normalized request doesn't overlap with */ - if (pa->pa_lstart >= end) { - spin_unlock(&pa->pa_lock); - continue; - } - if (pa_end <= start) { - spin_unlock(&pa->pa_lock); - continue; - } - BUG_ON(pa->pa_lstart <= start && pa_end >= end); - - if (pa_end <= ac->ac_o_ex.fe_logical) { - BUG_ON(pa_end < start); - start = pa_end; - } - - if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { - BUG_ON(pa->pa_lstart > end); - end = pa->pa_lstart; - } - spin_unlock(&pa->pa_lock); - } - rcu_read_unlock(); - size = end - start; - - /* XXX: extra loop to check we really don't overlap preallocations */ - rcu_read_lock(); - list_for_each_rcu(cur, &ei->i_prealloc_list) { - struct ext4_prealloc_space *pa; - unsigned long pa_end; - pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); - spin_lock(&pa->pa_lock); - if (pa->pa_deleted == 0) { - pa_end = pa->pa_lstart + pa->pa_len; - BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); - } - spin_unlock(&pa->pa_lock); - } - rcu_read_unlock(); - - if (start + size <= ac->ac_o_ex.fe_logical && - start > ac->ac_o_ex.fe_logical) { - printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n", - (unsigned long) start, (unsigned long) size, - (unsigned long) ac->ac_o_ex.fe_logical); - } - BUG_ON(start + size <= ac->ac_o_ex.fe_logical && - start > ac->ac_o_ex.fe_logical); - BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); - - /* now prepare goal request */ - - /* XXX: is it better to align blocks WRT to logical - * placement or satisfy big request as is */ - ac->ac_g_ex.fe_logical = start; - ac->ac_g_ex.fe_len = size; - - /* define goal start in order to merge */ - if (ar->pright && (ar->lright == (start + size))) { - /* merge to the right */ - ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size, - &ac->ac_f_ex.fe_group, - &ac->ac_f_ex.fe_start); - ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; - } - if (ar->pleft && (ar->lleft + 1 == start)) { - /* merge to the left */ - ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1, - &ac->ac_f_ex.fe_group, - &ac->ac_f_ex.fe_start); - ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; - } - - mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size, - (unsigned) orig_size, (unsigned) start); -} - -static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) -{ - struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); - - if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { - atomic_inc(&sbi->s_bal_reqs); - atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); - if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len) - atomic_inc(&sbi->s_bal_success); - atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); - if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && - ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) - atomic_inc(&sbi->s_bal_goals); - if (ac->ac_found > sbi->s_mb_max_to_scan) - atomic_inc(&sbi->s_bal_breaks); - } - - ext4_mb_store_history(ac); -} - -/* - * use blocks preallocated to inode - */ -static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, - struct ext4_prealloc_space *pa) -{ - ext4_fsblk_t start; - ext4_fsblk_t end; - int len; - - /* found preallocated blocks, use them */ - start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); - end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len); - len = end - start; - ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, - &ac->ac_b_ex.fe_start); - ac->ac_b_ex.fe_len = len; - ac->ac_status = AC_STATUS_FOUND; - ac->ac_pa = pa; - - BUG_ON(start < pa->pa_pstart); - BUG_ON(start + len > pa->pa_pstart + pa->pa_len); - BUG_ON(pa->pa_free < len); - pa->pa_free -= len; - - mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa); -} - -/* - * use blocks preallocated to locality group - */ -static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, - struct ext4_prealloc_space *pa) -{ - unsigned len = ac->ac_o_ex.fe_len; - - ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, - &ac->ac_b_ex.fe_group, - &ac->ac_b_ex.fe_start); - ac->ac_b_ex.fe_len = len; - ac->ac_status = AC_STATUS_FOUND; - ac->ac_pa = pa; - - /* we don't correct pa_pstart or pa_plen here to avoid - * possible race when tte group is being loaded concurrently - * instead we correct pa later, after blocks are marked - * in on-disk bitmap -- see ext4_mb_release_context() */ - /* - * FIXME!! but the other CPUs can look at this particular - * pa and think that it have enought free blocks if we - * don't update pa_free here right ? - */ - mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); -} - -/* - * search goal blocks in preallocated space - */ -static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) -{ - struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); - struct ext4_locality_group *lg; - struct ext4_prealloc_space *pa; - struct list_head *cur; - - /* only data can be preallocated */ - if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) - return 0; - - /* first, try per-file preallocation */ - rcu_read_lock(); - list_for_each_rcu(cur, &ei->i_prealloc_list) { - pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); - - /* all fields in this condition don't change, - * so we can skip locking for them */ - if (ac->ac_o_ex.fe_logical < pa->pa_lstart || - ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) - continue; - - /* found preallocated blocks, use them */ - spin_lock(&pa->pa_lock); - if (pa->pa_deleted == 0 && pa->pa_free) { - atomic_inc(&pa->pa_count); - ext4_mb_use_inode_pa(ac, pa); - spin_unlock(&pa->pa_lock); - ac->ac_criteria = 10; - rcu_read_unlock(); - return 1; - } - spin_unlock(&pa->pa_lock); - } - rcu_read_unlock(); - - /* can we use group allocation? */ - if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)) - return 0; - - /* inode may have no locality group for some reason */ - lg = ac->ac_lg; - if (lg == NULL) - return 0; - - rcu_read_lock(); - list_for_each_rcu(cur, &lg->lg_prealloc_list) { - pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); - spin_lock(&pa->pa_lock); - if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { - atomic_inc(&pa->pa_count); - ext4_mb_use_group_pa(ac, pa); - spin_unlock(&pa->pa_lock); - ac->ac_criteria = 20; - rcu_read_unlock(); - return 1; - } - spin_unlock(&pa->pa_lock); - } - rcu_read_unlock(); - - return 0; -} - -/* - * the function goes through all preallocation in this group and marks them - * used in in-core bitmap. buddy must be generated from this bitmap - * Need to be called with ext4 group lock (ext4_lock_group) - */ -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, - ext4_group_t group) -{ - struct ext4_group_info *grp = ext4_get_group_info(sb, group); - struct ext4_prealloc_space *pa; - struct list_head *cur; - ext4_group_t groupnr; - ext4_grpblk_t start; - int preallocated = 0; - int count = 0; - int len; - - /* all form of preallocation discards first load group, - * so the only competing code is preallocation use. - * we don't need any locking here - * notice we do NOT ignore preallocations with pa_deleted - * otherwise we could leave used blocks available for - * allocation in buddy when concurrent ext4_mb_put_pa() - * is dropping preallocation - */ - list_for_each(cur, &grp->bb_prealloc_list) { - pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); - spin_lock(&pa->pa_lock); - ext4_get_group_no_and_offset(sb, pa->pa_pstart, - &groupnr, &start); - len = pa->pa_len; - spin_unlock(&pa->pa_lock); - if (unlikely(len == 0)) - continue; - BUG_ON(groupnr != group); - mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), - bitmap, start, len); - preallocated += len; - count++; - } - mb_debug("prellocated %u for group %lu\n", preallocated, group); -} - -static void ext4_mb_pa_callback(struct rcu_head *head) -{ - struct ext4_prealloc_space *pa; - pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); - kmem_cache_free(ext4_pspace_cachep, pa); -} - -/* - * drops a reference to preallocated space descriptor - * if this was the last reference and the space is consumed - */ -static void ext4_mb_put_pa(struct ext4_allocation_context *ac, - struct super_block *sb, struct ext4_prealloc_space *pa) -{ - unsigned long grp; - - if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) - return; - - /* in this short window concurrent discard can set pa_deleted */ - spin_lock(&pa->pa_lock); - if (pa->pa_deleted == 1) { - spin_unlock(&pa->pa_lock); - return; - } - - pa->pa_deleted = 1; - spin_unlock(&pa->pa_lock); - - /* -1 is to protect from crossing allocation group */ - ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL); - - /* - * possible race: - * - * P1 (buddy init) P2 (regular allocation) - * find block B in PA - * copy on-disk bitmap to buddy - * mark B in on-disk bitmap - * drop PA from group - * mark all PAs in buddy - * - * thus, P1 initializes buddy with B available. to prevent this - * we make "copy" and "mark all PAs" atomic and serialize "drop PA" - * against that pair - */ - ext4_lock_group(sb, grp); - list_del(&pa->pa_group_list); - ext4_unlock_group(sb, grp); - - spin_lock(pa->pa_obj_lock); - list_del_rcu(&pa->pa_inode_list); - spin_unlock(pa->pa_obj_lock); - - call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); -} - -/* - * creates new preallocated space for given inode - */ -static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) -{ - struct super_block *sb = ac->ac_sb; - struct ext4_prealloc_space *pa; - struct ext4_group_info *grp; - struct ext4_inode_info *ei; - - /* preallocate only when found space is larger then requested */ - BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len); - BUG_ON(ac->ac_status != AC_STATUS_FOUND); - BUG_ON(!S_ISREG(ac->ac_inode->i_mode)); - - pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS); - if (pa == NULL) - return -ENOMEM; - - if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) { - int winl; - int wins; - int win; - int offs; - - /* we can't allocate as much as normalizer wants. - * so, found space must get proper lstart - * to cover original request */ - BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); - BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); - - /* we're limited by original request in that - * logical block must be covered any way - * winl is window we can move our chunk within */ - winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; - - /* also, we should cover whole original request */ - wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len; - - /* the smallest one defines real window */ - win = min(winl, wins); - - offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len; - if (offs && offs < win) - win = offs; - - ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win; - BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); - BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); - } - - /* preallocation can change ac_b_ex, thus we store actually - * allocated blocks for history */ - ac->ac_f_ex = ac->ac_b_ex; - - pa->pa_lstart = ac->ac_b_ex.fe_logical; - pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); - pa->pa_len = ac->ac_b_ex.fe_len; - pa->pa_free = pa->pa_len; - atomic_set(&pa->pa_count, 1); - spin_lock_init(&pa->pa_lock); - pa->pa_deleted = 0; - pa->pa_linear = 0; - - mb_debug("new inode pa %p: %llu/%u for %u\n", pa, - pa->pa_pstart, pa->pa_len, pa->pa_lstart); - - ext4_mb_use_inode_pa(ac, pa); - atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); - - ei = EXT4_I(ac->ac_inode); - grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); - - pa->pa_obj_lock = &ei->i_prealloc_lock; - pa->pa_inode = ac->ac_inode; - - ext4_lock_group(sb, ac->ac_b_ex.fe_group); - list_add(&pa->pa_group_list, &grp->bb_prealloc_list); - ext4_unlock_group(sb, ac->ac_b_ex.fe_group); - - spin_lock(pa->pa_obj_lock); - list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list); - spin_unlock(pa->pa_obj_lock); - - return 0; -} - -/* - * creates new preallocated space for locality group inodes belongs to - */ -static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac) -{ - struct super_block *sb = ac->ac_sb; - struct ext4_locality_group *lg; - struct ext4_prealloc_space *pa; - struct ext4_group_info *grp; - - /* preallocate only when found space is larger then requested */ - BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len); - BUG_ON(ac->ac_status != AC_STATUS_FOUND); - BUG_ON(!S_ISREG(ac->ac_inode->i_mode)); - - BUG_ON(ext4_pspace_cachep == NULL); - pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS); - if (pa == NULL) - return -ENOMEM; - - /* preallocation can change ac_b_ex, thus we store actually - * allocated blocks for history */ - ac->ac_f_ex = ac->ac_b_ex; - - pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); - pa->pa_lstart = pa->pa_pstart; - pa->pa_len = ac->ac_b_ex.fe_len; - pa->pa_free = pa->pa_len; - atomic_set(&pa->pa_count, 1); - spin_lock_init(&pa->pa_lock); - pa->pa_deleted = 0; - pa->pa_linear = 1; - - mb_debug("new group pa %p: %llu/%u for %u\n", pa, - pa->pa_pstart, pa->pa_len, pa->pa_lstart); - - ext4_mb_use_group_pa(ac, pa); - atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); - - grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); - lg = ac->ac_lg; - BUG_ON(lg == NULL); - - pa->pa_obj_lock = &lg->lg_prealloc_lock; - pa->pa_inode = NULL; - - ext4_lock_group(sb, ac->ac_b_ex.fe_group); - list_add(&pa->pa_group_list, &grp->bb_prealloc_list); - ext4_unlock_group(sb, ac->ac_b_ex.fe_group); - - spin_lock(pa->pa_obj_lock); - list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list); - spin_unlock(pa->pa_obj_lock); - - return 0; -} - -static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) -{ - int err; - - if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) - err = ext4_mb_new_group_pa(ac); - else - err = ext4_mb_new_inode_pa(ac); - return err; -} - -/* - * finds all unused blocks in on-disk bitmap, frees them in - * in-core bitmap and buddy. - * @pa must be unlinked from inode and group lists, so that - * nobody else can find/use it. - * the caller MUST hold group/inode locks. - * TODO: optimize the case when there are no in-core structures yet - */ -static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, - struct buffer_head *bitmap_bh, - struct ext4_prealloc_space *pa) -{ - struct ext4_allocation_context ac; - struct super_block *sb = e4b->bd_sb; - struct ext4_sb_info *sbi = EXT4_SB(sb); - unsigned long end; - unsigned long next; - ext4_group_t group; - ext4_grpblk_t bit; - sector_t start; - int err = 0; - int free = 0; - - BUG_ON(pa->pa_deleted == 0); - ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); - BUG_ON(group != e4b->bd_group && pa->pa_len != 0); - end = bit + pa->pa_len; - - ac.ac_sb = sb; - ac.ac_inode = pa->pa_inode; - ac.ac_op = EXT4_MB_HISTORY_DISCARD; - - while (bit < end) { - bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit); - if (bit >= end) - break; - next = ext4_find_next_bit(bitmap_bh->b_data, end, bit); - if (next > end) - next = end; - start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + - le32_to_cpu(sbi->s_es->s_first_data_block); - mb_debug(" free preallocated %u/%u in group %u\n", - (unsigned) start, (unsigned) next - bit, - (unsigned) group); - free += next - bit; - - ac.ac_b_ex.fe_group = group; - ac.ac_b_ex.fe_start = bit; - ac.ac_b_ex.fe_len = next - bit; - ac.ac_b_ex.fe_logical = 0; - ext4_mb_store_history(&ac); - - mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); - bit = next + 1; - } - if (free != pa->pa_free) { - printk(KERN_ERR "pa %p: logic %lu, phys. %lu, len %lu\n", - pa, (unsigned long) pa->pa_lstart, - (unsigned long) pa->pa_pstart, - (unsigned long) pa->pa_len); - printk(KERN_ERR "free %u, pa_free %u\n", free, pa->pa_free); - } - BUG_ON(free != pa->pa_free); - atomic_add(free, &sbi->s_mb_discarded); - - return err; -} - -static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, - struct ext4_prealloc_space *pa) -{ - struct ext4_allocation_context ac; - struct super_block *sb = e4b->bd_sb; - ext4_group_t group; - ext4_grpblk_t bit; - - ac.ac_op = EXT4_MB_HISTORY_DISCARD; - - BUG_ON(pa->pa_deleted == 0); - ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); - BUG_ON(group != e4b->bd_group && pa->pa_len != 0); - mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); - atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); - - ac.ac_sb = sb; - ac.ac_inode = NULL; - ac.ac_b_ex.fe_group = group; - ac.ac_b_ex.fe_start = bit; - ac.ac_b_ex.fe_len = pa->pa_len; - ac.ac_b_ex.fe_logical = 0; - ext4_mb_store_history(&ac); - - return 0; -} - -/* - * releases all preallocations in given group - * - * first, we need to decide discard policy: - * - when do we discard - * 1) ENOSPC - * - how many do we discard - * 1) how many requested - */ -static int ext4_mb_discard_group_preallocations(struct super_block *sb, - ext4_group_t group, int needed) -{ - struct ext4_group_info *grp = ext4_get_group_info(sb, group); - struct buffer_head *bitmap_bh = NULL; - struct ext4_prealloc_space *pa, *tmp; - struct list_head list; - struct ext4_buddy e4b; - int err; - int busy = 0; - int free = 0; - - mb_debug("discard preallocation for group %lu\n", group); - - if (list_empty(&grp->bb_prealloc_list)) - return 0; - - bitmap_bh = read_block_bitmap(sb, group); - if (bitmap_bh == NULL) { - /* error handling here */ - ext4_mb_release_desc(&e4b); - BUG_ON(bitmap_bh == NULL); - } - - err = ext4_mb_load_buddy(sb, group, &e4b); - BUG_ON(err != 0); /* error handling here */ - - if (needed == 0) - needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; - - grp = ext4_get_group_info(sb, group); - INIT_LIST_HEAD(&list); - -repeat: - ext4_lock_group(sb, group); - list_for_each_entry_safe(pa, tmp, - &grp->bb_prealloc_list, pa_group_list) { - spin_lock(&pa->pa_lock); - if (atomic_read(&pa->pa_count)) { - spin_unlock(&pa->pa_lock); - busy = 1; - continue; - } - if (pa->pa_deleted) { - spin_unlock(&pa->pa_lock); - continue; - } - - /* seems this one can be freed ... */ - pa->pa_deleted = 1; - - /* we can trust pa_free ... */ - free += pa->pa_free; - - spin_unlock(&pa->pa_lock); - - list_del(&pa->pa_group_list); - list_add(&pa->u.pa_tmp_list, &list); - } - - /* if we still need more blocks and some PAs were used, try again */ - if (free < needed && busy) { - busy = 0; - ext4_unlock_group(sb, group); - /* - * Yield the CPU here so that we don't get soft lockup - * in non preempt case. - */ - yield(); - goto repeat; - } - - /* found anything to free? */ - if (list_empty(&list)) { - BUG_ON(free != 0); - goto out; - } - - /* now free all selected PAs */ - list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { - - /* remove from object (inode or locality group) */ - spin_lock(pa->pa_obj_lock); - list_del_rcu(&pa->pa_inode_list); - spin_unlock(pa->pa_obj_lock); - - if (pa->pa_linear) - ext4_mb_release_group_pa(&e4b, pa); - else - ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); - - list_del(&pa->u.pa_tmp_list); - call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); - } - -out: - ext4_unlock_group(sb, group); - ext4_mb_release_desc(&e4b); - put_bh(bitmap_bh); - return free; -} - -/* - * releases all non-used preallocated blocks for given inode - * - * It's important to discard preallocations under i_data_sem - * We don't want another block to be served from the prealloc - * space when we are discarding the inode prealloc space. - * - * FIXME!! Make sure it is valid at all the call sites - */ -void ext4_mb_discard_inode_preallocations(struct inode *inode) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - struct super_block *sb = inode->i_sb; - struct buffer_head *bitmap_bh = NULL; - struct ext4_prealloc_space *pa, *tmp; - ext4_group_t group = 0; - struct list_head list; - struct ext4_buddy e4b; - int err; - - if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { - /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ - return; - } - - mb_debug("discard preallocation for inode %lu\n", inode->i_ino); - - INIT_LIST_HEAD(&list); - -repeat: - /* first, collect all pa's in the inode */ - spin_lock(&ei->i_prealloc_lock); - while (!list_empty(&ei->i_prealloc_list)) { - pa = list_entry(ei->i_prealloc_list.next, - struct ext4_prealloc_space, pa_inode_list); - BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock); - spin_lock(&pa->pa_lock); - if (atomic_read(&pa->pa_count)) { - /* this shouldn't happen often - nobody should - * use preallocation while we're discarding it */ - spin_unlock(&pa->pa_lock); - spin_unlock(&ei->i_prealloc_lock); - printk(KERN_ERR "uh-oh! used pa while discarding\n"); - WARN_ON(1); - schedule_timeout_uninterruptible(HZ); - goto repeat; - - } - if (pa->pa_deleted == 0) { - pa->pa_deleted = 1; - spin_unlock(&pa->pa_lock); - list_del_rcu(&pa->pa_inode_list); - list_add(&pa->u.pa_tmp_list, &list); - continue; - } - - /* someone is deleting pa right now */ - spin_unlock(&pa->pa_lock); - spin_unlock(&ei->i_prealloc_lock); - - /* we have to wait here because pa_deleted - * doesn't mean pa is already unlinked from - * the list. as we might be called from - * ->clear_inode() the inode will get freed - * and concurrent thread which is unlinking - * pa from inode's list may access already - * freed memory, bad-bad-bad */ - - /* XXX: if this happens too often, we can - * add a flag to force wait only in case - * of ->clear_inode(), but not in case of - * regular truncate */ - schedule_timeout_uninterruptible(HZ); - goto repeat; - } - spin_unlock(&ei->i_prealloc_lock); - - list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { - BUG_ON(pa->pa_linear != 0); - ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); - - err = ext4_mb_load_buddy(sb, group, &e4b); - BUG_ON(err != 0); /* error handling here */ - - bitmap_bh = read_block_bitmap(sb, group); - if (bitmap_bh == NULL) { - /* error handling here */ - ext4_mb_release_desc(&e4b); - BUG_ON(bitmap_bh == NULL); - } - - ext4_lock_group(sb, group); - list_del(&pa->pa_group_list); - ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); - ext4_unlock_group(sb, group); - - ext4_mb_release_desc(&e4b); - put_bh(bitmap_bh); - - list_del(&pa->u.pa_tmp_list); - call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); - } -} - -/* - * finds all preallocated spaces and return blocks being freed to them - * if preallocated space becomes full (no block is used from the space) - * then the function frees space in buddy - * XXX: at the moment, truncate (which is the only way to free blocks) - * discards all preallocations - */ -static void ext4_mb_return_to_preallocation(struct inode *inode, - struct ext4_buddy *e4b, - sector_t block, int count) -{ - BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); -} -#ifdef MB_DEBUG -static void ext4_mb_show_ac(struct ext4_allocation_context *ac) -{ - struct super_block *sb = ac->ac_sb; - ext4_group_t i; - - printk(KERN_ERR "EXT4-fs: Can't allocate:" - " Allocation context details:\n"); - printk(KERN_ERR "EXT4-fs: status %d flags %d\n", - ac->ac_status, ac->ac_flags); - printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, " - "best %lu/%lu/%lu@%lu cr %d\n", - (unsigned long)ac->ac_o_ex.fe_group, - (unsigned long)ac->ac_o_ex.fe_start, - (unsigned long)ac->ac_o_ex.fe_len, - (unsigned long)ac->ac_o_ex.fe_logical, - (unsigned long)ac->ac_g_ex.fe_group, - (unsigned long)ac->ac_g_ex.fe_start, - (unsigned long)ac->ac_g_ex.fe_len, - (unsigned long)ac->ac_g_ex.fe_logical, - (unsigned long)ac->ac_b_ex.fe_group, - (unsigned long)ac->ac_b_ex.fe_start, - (unsigned long)ac->ac_b_ex.fe_len, - (unsigned long)ac->ac_b_ex.fe_logical, - (int)ac->ac_criteria); - printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, - ac->ac_found); - printk(KERN_ERR "EXT4-fs: groups: \n"); - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { - struct ext4_group_info *grp = ext4_get_group_info(sb, i); - struct ext4_prealloc_space *pa; - ext4_grpblk_t start; - struct list_head *cur; - ext4_lock_group(sb, i); - list_for_each(cur, &grp->bb_prealloc_list) { - pa = list_entry(cur, struct ext4_prealloc_space, - pa_group_list); - spin_lock(&pa->pa_lock); - ext4_get_group_no_and_offset(sb, pa->pa_pstart, - NULL, &start); - spin_unlock(&pa->pa_lock); - printk(KERN_ERR "PA:%lu:%d:%u \n", i, - start, pa->pa_len); - } - ext4_lock_group(sb, i); - - if (grp->bb_free == 0) - continue; - printk(KERN_ERR "%lu: %d/%d \n", - i, grp->bb_free, grp->bb_fragments); - } - printk(KERN_ERR "\n"); -} -#else -static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac) -{ - return; -} -#endif - -/* - * We use locality group preallocation for small size file. The size of the - * file is determined by the current size or the resulting size after - * allocation which ever is larger - * - * One can tune this size via /proc/fs/ext4//stream_req - */ -static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) -{ - struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); - int bsbits = ac->ac_sb->s_blocksize_bits; - loff_t size, isize; - - if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) - return; - - size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; - isize = i_size_read(ac->ac_inode) >> bsbits; - size = max(size, isize); - - /* don't use group allocation for large files */ - if (size >= sbi->s_mb_stream_request) - return; - - if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) - return; - - BUG_ON(ac->ac_lg != NULL); - /* - * locality group prealloc space are per cpu. The reason for having - * per cpu locality group is to reduce the contention between block - * request from multiple CPUs. - */ - ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; - put_cpu(); - - /* we're going to use group allocation */ - ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; - - /* serialize all allocations in the group */ - mutex_lock(&ac->ac_lg->lg_mutex); -} - -static int ext4_mb_initialize_context(struct ext4_allocation_context *ac, - struct ext4_allocation_request *ar) -{ - struct super_block *sb = ar->inode->i_sb; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; - ext4_group_t group; - unsigned long len; - unsigned long goal; - ext4_grpblk_t block; - - /* we can't allocate > group size */ - len = ar->len; - - /* just a dirty hack to filter too big requests */ - if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10) - len = EXT4_BLOCKS_PER_GROUP(sb) - 10; - - /* start searching from the goal */ - goal = ar->goal; - if (goal < le32_to_cpu(es->s_first_data_block) || - goal >= ext4_blocks_count(es)) - goal = le32_to_cpu(es->s_first_data_block); - ext4_get_group_no_and_offset(sb, goal, &group, &block); - - /* set up allocation goals */ - ac->ac_b_ex.fe_logical = ar->logical; - ac->ac_b_ex.fe_group = 0; - ac->ac_b_ex.fe_start = 0; - ac->ac_b_ex.fe_len = 0; - ac->ac_status = AC_STATUS_CONTINUE; - ac->ac_groups_scanned = 0; - ac->ac_ex_scanned = 0; - ac->ac_found = 0; - ac->ac_sb = sb; - ac->ac_inode = ar->inode; - ac->ac_o_ex.fe_logical = ar->logical; - ac->ac_o_ex.fe_group = group; - ac->ac_o_ex.fe_start = block; - ac->ac_o_ex.fe_len = len; - ac->ac_g_ex.fe_logical = ar->logical; - ac->ac_g_ex.fe_group = group; - ac->ac_g_ex.fe_start = block; - ac->ac_g_ex.fe_len = len; - ac->ac_f_ex.fe_len = 0; - ac->ac_flags = ar->flags; - ac->ac_2order = 0; - ac->ac_criteria = 0; - ac->ac_pa = NULL; - ac->ac_bitmap_page = NULL; - ac->ac_buddy_page = NULL; - ac->ac_lg = NULL; - - /* we have to define context: we'll we work with a file or - * locality group. this is a policy, actually */ - ext4_mb_group_or_file(ac); - - mb_debug("init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " - "left: %u/%u, right %u/%u to %swritable\n", - (unsigned) ar->len, (unsigned) ar->logical, - (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, - (unsigned) ar->lleft, (unsigned) ar->pleft, - (unsigned) ar->lright, (unsigned) ar->pright, - atomic_read(&ar->inode->i_writecount) ? "" : "non-"); - return 0; - -} - -/* - * release all resource we used in allocation - */ -static int ext4_mb_release_context(struct ext4_allocation_context *ac) -{ - if (ac->ac_pa) { - if (ac->ac_pa->pa_linear) { - /* see comment in ext4_mb_use_group_pa() */ - spin_lock(&ac->ac_pa->pa_lock); - ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len; - ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len; - ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len; - ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len; - spin_unlock(&ac->ac_pa->pa_lock); - } - ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa); - } - if (ac->ac_bitmap_page) - page_cache_release(ac->ac_bitmap_page); - if (ac->ac_buddy_page) - page_cache_release(ac->ac_buddy_page); - if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) - mutex_unlock(&ac->ac_lg->lg_mutex); - ext4_mb_collect_stats(ac); - return 0; -} - -static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) -{ - ext4_group_t i; - int ret; - int freed = 0; - - for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { - ret = ext4_mb_discard_group_preallocations(sb, i, needed); - freed += ret; - needed -= ret; - } - - return freed; -} - -/* - * Main entry point into mballoc to allocate blocks - * it tries to use preallocation first, then falls back - * to usual allocation - */ -ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, - struct ext4_allocation_request *ar, int *errp) -{ - struct ext4_allocation_context ac; - struct ext4_sb_info *sbi; - struct super_block *sb; - ext4_fsblk_t block = 0; - int freed; - int inquota; - - sb = ar->inode->i_sb; - sbi = EXT4_SB(sb); - - if (!test_opt(sb, MBALLOC)) { - block = ext4_new_blocks_old(handle, ar->inode, ar->goal, - &(ar->len), errp); - return block; - } - - while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { - ar->flags |= EXT4_MB_HINT_NOPREALLOC; - ar->len--; - } - if (ar->len == 0) { - *errp = -EDQUOT; - return 0; - } - inquota = ar->len; - - ext4_mb_poll_new_transaction(sb, handle); - - *errp = ext4_mb_initialize_context(&ac, ar); - if (*errp) { - ar->len = 0; - goto out; - } - - ac.ac_op = EXT4_MB_HISTORY_PREALLOC; - if (!ext4_mb_use_preallocated(&ac)) { - - ac.ac_op = EXT4_MB_HISTORY_ALLOC; - ext4_mb_normalize_request(&ac, ar); - -repeat: - /* allocate space in core */ - ext4_mb_regular_allocator(&ac); - - /* as we've just preallocated more space than - * user requested orinally, we store allocated - * space in a special descriptor */ - if (ac.ac_status == AC_STATUS_FOUND && - ac.ac_o_ex.fe_len < ac.ac_b_ex.fe_len) - ext4_mb_new_preallocation(&ac); - } - - if (likely(ac.ac_status == AC_STATUS_FOUND)) { - ext4_mb_mark_diskspace_used(&ac, handle); - *errp = 0; - block = ext4_grp_offs_to_block(sb, &ac.ac_b_ex); - ar->len = ac.ac_b_ex.fe_len; - } else { - freed = ext4_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len); - if (freed) - goto repeat; - *errp = -ENOSPC; - ac.ac_b_ex.fe_len = 0; - ar->len = 0; - ext4_mb_show_ac(&ac); - } - - ext4_mb_release_context(&ac); - -out: - if (ar->len < inquota) - DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); - - return block; -} -static void ext4_mb_poll_new_transaction(struct super_block *sb, - handle_t *handle) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (sbi->s_last_transaction == handle->h_transaction->t_tid) - return; - - /* new transaction! time to close last one and free blocks for - * committed transaction. we know that only transaction can be - * active, so previos transaction can be being logged and we - * know that transaction before previous is known to be already - * logged. this means that now we may free blocks freed in all - * transactions before previous one. hope I'm clear enough ... */ - - spin_lock(&sbi->s_md_lock); - if (sbi->s_last_transaction != handle->h_transaction->t_tid) { - mb_debug("new transaction %lu, old %lu\n", - (unsigned long) handle->h_transaction->t_tid, - (unsigned long) sbi->s_last_transaction); - list_splice_init(&sbi->s_closed_transaction, - &sbi->s_committed_transaction); - list_splice_init(&sbi->s_active_transaction, - &sbi->s_closed_transaction); - sbi->s_last_transaction = handle->h_transaction->t_tid; - } - spin_unlock(&sbi->s_md_lock); - - ext4_mb_free_committed_blocks(sb); -} - -static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, - ext4_group_t group, ext4_grpblk_t block, int count) -{ - struct ext4_group_info *db = e4b->bd_info; - struct super_block *sb = e4b->bd_sb; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_free_metadata *md; - int i; - - BUG_ON(e4b->bd_bitmap_page == NULL); - BUG_ON(e4b->bd_buddy_page == NULL); - - ext4_lock_group(sb, group); - for (i = 0; i < count; i++) { - md = db->bb_md_cur; - if (md && db->bb_tid != handle->h_transaction->t_tid) { - db->bb_md_cur = NULL; - md = NULL; - } - - if (md == NULL) { - ext4_unlock_group(sb, group); - md = kmalloc(sizeof(*md), GFP_NOFS); - if (md == NULL) - return -ENOMEM; - md->num = 0; - md->group = group; - - ext4_lock_group(sb, group); - if (db->bb_md_cur == NULL) { - spin_lock(&sbi->s_md_lock); - list_add(&md->list, &sbi->s_active_transaction); - spin_unlock(&sbi->s_md_lock); - /* protect buddy cache from being freed, - * otherwise we'll refresh it from - * on-disk bitmap and lose not-yet-available - * blocks */ - page_cache_get(e4b->bd_buddy_page); - page_cache_get(e4b->bd_bitmap_page); - db->bb_md_cur = md; - db->bb_tid = handle->h_transaction->t_tid; - mb_debug("new md 0x%p for group %lu\n", - md, md->group); - } else { - kfree(md); - md = db->bb_md_cur; - } - } - - BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); - md->blocks[md->num] = block + i; - md->num++; - if (md->num == EXT4_BB_MAX_BLOCKS) { - /* no more space, put full container on a sb's list */ - db->bb_md_cur = NULL; - } - } - ext4_unlock_group(sb, group); - return 0; -} - -/* - * Main entry point into mballoc to free blocks - */ -void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, - unsigned long block, unsigned long count, - int metadata, unsigned long *freed) -{ - struct buffer_head *bitmap_bh = 0; - struct super_block *sb = inode->i_sb; - struct ext4_allocation_context ac; - struct ext4_group_desc *gdp; - struct ext4_super_block *es; - unsigned long overflow; - ext4_grpblk_t bit; - struct buffer_head *gd_bh; - ext4_group_t block_group; - struct ext4_sb_info *sbi; - struct ext4_buddy e4b; - int err = 0; - int ret; - - *freed = 0; - - ext4_mb_poll_new_transaction(sb, handle); - - sbi = EXT4_SB(sb); - es = EXT4_SB(sb)->s_es; - if (block < le32_to_cpu(es->s_first_data_block) || - block + count < block || - block + count > ext4_blocks_count(es)) { - ext4_error(sb, __FUNCTION__, - "Freeing blocks not in datazone - " - "block = %lu, count = %lu", block, count); - goto error_return; - } - - ext4_debug("freeing block %lu\n", block); - - ac.ac_op = EXT4_MB_HISTORY_FREE; - ac.ac_inode = inode; - ac.ac_sb = sb; - -do_more: - overflow = 0; - ext4_get_group_no_and_offset(sb, block, &block_group, &bit); - - /* - * Check to see if we are freeing blocks across a group - * boundary. - */ - if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { - overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); - count -= overflow; - } - bitmap_bh = read_block_bitmap(sb, block_group); - if (!bitmap_bh) - goto error_return; - gdp = ext4_get_group_desc(sb, block_group, &gd_bh); - if (!gdp) - goto error_return; - - if (in_range(ext4_block_bitmap(sb, gdp), block, count) || - in_range(ext4_inode_bitmap(sb, gdp), block, count) || - in_range(block, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group) || - in_range(block + count - 1, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) { - - ext4_error(sb, __FUNCTION__, - "Freeing blocks in system zone - " - "Block = %lu, count = %lu", block, count); - } - - BUFFER_TRACE(bitmap_bh, "getting write access"); - err = ext4_journal_get_write_access(handle, bitmap_bh); - if (err) - goto error_return; - - /* - * We are about to modify some metadata. Call the journal APIs - * to unshare ->b_data if a currently-committing transaction is - * using it - */ - BUFFER_TRACE(gd_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, gd_bh); - if (err) - goto error_return; - - err = ext4_mb_load_buddy(sb, block_group, &e4b); - if (err) - goto error_return; - -#ifdef AGGRESSIVE_CHECK - { - int i; - for (i = 0; i < count; i++) - BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); - } -#endif - mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, - bit, count); - - /* We dirtied the bitmap block */ - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); - err = ext4_journal_dirty_metadata(handle, bitmap_bh); - - ac.ac_b_ex.fe_group = block_group; - ac.ac_b_ex.fe_start = bit; - ac.ac_b_ex.fe_len = count; - ext4_mb_store_history(&ac); - - if (metadata) { - /* blocks being freed are metadata. these blocks shouldn't - * be used until this transaction is committed */ - ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); - } else { - ext4_lock_group(sb, block_group); - err = mb_free_blocks(inode, &e4b, bit, count); - ext4_mb_return_to_preallocation(inode, &e4b, block, count); - ext4_unlock_group(sb, block_group); - BUG_ON(err != 0); - } - - spin_lock(sb_bgl_lock(sbi, block_group)); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); - gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_add(&sbi->s_freeblocks_counter, count); - - ext4_mb_release_desc(&e4b); - - *freed += count; - - /* And the group descriptor block */ - BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); - ret = ext4_journal_dirty_metadata(handle, gd_bh); - if (!err) - err = ret; - - if (overflow && !err) { - block += count; - count = overflow; - put_bh(bitmap_bh); - goto do_more; - } - sb->s_dirt = 1; -error_return: - brelse(bitmap_bh); - ext4_std_error(sb, err); - return; -} diff --git a/trunk/fs/ext4/migrate.c b/trunk/fs/ext4/migrate.c deleted file mode 100644 index 3ebc2332f52e..000000000000 --- a/trunk/fs/ext4/migrate.c +++ /dev/null @@ -1,560 +0,0 @@ -/* - * Copyright IBM Corporation, 2007 - * Author Aneesh Kumar K.V - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - */ - -#include -#include -#include - -/* - * The contiguous blocks details which can be - * represented by a single extent - */ -struct list_blocks_struct { - ext4_lblk_t first_block, last_block; - ext4_fsblk_t first_pblock, last_pblock; -}; - -static int finish_range(handle_t *handle, struct inode *inode, - struct list_blocks_struct *lb) - -{ - int retval = 0, needed; - struct ext4_extent newext; - struct ext4_ext_path *path; - if (lb->first_pblock == 0) - return 0; - - /* Add the extent to temp inode*/ - newext.ee_block = cpu_to_le32(lb->first_block); - newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); - ext4_ext_store_pblock(&newext, lb->first_pblock); - path = ext4_ext_find_extent(inode, lb->first_block, NULL); - - if (IS_ERR(path)) { - retval = PTR_ERR(path); - goto err_out; - } - - /* - * Calculate the credit needed to inserting this extent - * Since we are doing this in loop we may accumalate extra - * credit. But below we try to not accumalate too much - * of them by restarting the journal. - */ - needed = ext4_ext_calc_credits_for_insert(inode, path); - - /* - * Make sure the credit we accumalated is not really high - */ - if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) { - retval = ext4_journal_restart(handle, needed); - if (retval) - goto err_out; - } - if (needed) { - retval = ext4_journal_extend(handle, needed); - if (retval != 0) { - /* - * IF not able to extend the journal restart the journal - */ - retval = ext4_journal_restart(handle, needed); - if (retval) - goto err_out; - } - } - retval = ext4_ext_insert_extent(handle, inode, path, &newext); -err_out: - lb->first_pblock = 0; - return retval; -} - -static int update_extent_range(handle_t *handle, struct inode *inode, - ext4_fsblk_t pblock, ext4_lblk_t blk_num, - struct list_blocks_struct *lb) -{ - int retval; - /* - * See if we can add on to the existing range (if it exists) - */ - if (lb->first_pblock && - (lb->last_pblock+1 == pblock) && - (lb->last_block+1 == blk_num)) { - lb->last_pblock = pblock; - lb->last_block = blk_num; - return 0; - } - /* - * Start a new range. - */ - retval = finish_range(handle, inode, lb); - lb->first_pblock = lb->last_pblock = pblock; - lb->first_block = lb->last_block = blk_num; - - return retval; -} - -static int update_ind_extent_range(handle_t *handle, struct inode *inode, - ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, - struct list_blocks_struct *lb) -{ - struct buffer_head *bh; - __le32 *i_data; - int i, retval = 0; - ext4_lblk_t blk_count = *blk_nump; - unsigned long max_entries = inode->i_sb->s_blocksize >> 2; - - if (!pblock) { - /* Only update the file block number */ - *blk_nump += max_entries; - return 0; - } - - bh = sb_bread(inode->i_sb, pblock); - if (!bh) - return -EIO; - - i_data = (__le32 *)bh->b_data; - for (i = 0; i < max_entries; i++, blk_count++) { - if (i_data[i]) { - retval = update_extent_range(handle, inode, - le32_to_cpu(i_data[i]), - blk_count, lb); - if (retval) - break; - } - } - - /* Update the file block number */ - *blk_nump = blk_count; - put_bh(bh); - return retval; - -} - -static int update_dind_extent_range(handle_t *handle, struct inode *inode, - ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, - struct list_blocks_struct *lb) -{ - struct buffer_head *bh; - __le32 *i_data; - int i, retval = 0; - ext4_lblk_t blk_count = *blk_nump; - unsigned long max_entries = inode->i_sb->s_blocksize >> 2; - - if (!pblock) { - /* Only update the file block number */ - *blk_nump += max_entries * max_entries; - return 0; - } - bh = sb_bread(inode->i_sb, pblock); - if (!bh) - return -EIO; - - i_data = (__le32 *)bh->b_data; - for (i = 0; i < max_entries; i++) { - if (i_data[i]) { - retval = update_ind_extent_range(handle, inode, - le32_to_cpu(i_data[i]), - &blk_count, lb); - if (retval) - break; - } else { - /* Only update the file block number */ - blk_count += max_entries; - } - } - - /* Update the file block number */ - *blk_nump = blk_count; - put_bh(bh); - return retval; - -} - -static int update_tind_extent_range(handle_t *handle, struct inode *inode, - ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, - struct list_blocks_struct *lb) -{ - struct buffer_head *bh; - __le32 *i_data; - int i, retval = 0; - ext4_lblk_t blk_count = *blk_nump; - unsigned long max_entries = inode->i_sb->s_blocksize >> 2; - - if (!pblock) { - /* Only update the file block number */ - *blk_nump += max_entries * max_entries * max_entries; - return 0; - } - bh = sb_bread(inode->i_sb, pblock); - if (!bh) - return -EIO; - - i_data = (__le32 *)bh->b_data; - for (i = 0; i < max_entries; i++) { - if (i_data[i]) { - retval = update_dind_extent_range(handle, inode, - le32_to_cpu(i_data[i]), - &blk_count, lb); - if (retval) - break; - } else - /* Only update the file block number */ - blk_count += max_entries * max_entries; - } - /* Update the file block number */ - *blk_nump = blk_count; - put_bh(bh); - return retval; - -} - -static int free_dind_blocks(handle_t *handle, - struct inode *inode, __le32 i_data) -{ - int i; - __le32 *tmp_idata; - struct buffer_head *bh; - unsigned long max_entries = inode->i_sb->s_blocksize >> 2; - - bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); - if (!bh) - return -EIO; - - tmp_idata = (__le32 *)bh->b_data; - for (i = 0; i < max_entries; i++) { - if (tmp_idata[i]) - ext4_free_blocks(handle, inode, - le32_to_cpu(tmp_idata[i]), 1, 1); - } - put_bh(bh); - ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); - return 0; -} - -static int free_tind_blocks(handle_t *handle, - struct inode *inode, __le32 i_data) -{ - int i, retval = 0; - __le32 *tmp_idata; - struct buffer_head *bh; - unsigned long max_entries = inode->i_sb->s_blocksize >> 2; - - bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); - if (!bh) - return -EIO; - - tmp_idata = (__le32 *)bh->b_data; - for (i = 0; i < max_entries; i++) { - if (tmp_idata[i]) { - retval = free_dind_blocks(handle, - inode, tmp_idata[i]); - if (retval) { - put_bh(bh); - return retval; - } - } - } - put_bh(bh); - ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); - return 0; -} - -static int free_ind_block(handle_t *handle, struct inode *inode) -{ - int retval; - struct ext4_inode_info *ei = EXT4_I(inode); - - if (ei->i_data[EXT4_IND_BLOCK]) - ext4_free_blocks(handle, inode, - le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1); - - if (ei->i_data[EXT4_DIND_BLOCK]) { - retval = free_dind_blocks(handle, inode, - ei->i_data[EXT4_DIND_BLOCK]); - if (retval) - return retval; - } - - if (ei->i_data[EXT4_TIND_BLOCK]) { - retval = free_tind_blocks(handle, inode, - ei->i_data[EXT4_TIND_BLOCK]); - if (retval) - return retval; - } - return 0; -} - -static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, - struct inode *tmp_inode, int retval) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); - - retval = free_ind_block(handle, inode); - if (retval) - goto err_out; - - /* - * One credit accounted for writing the - * i_data field of the original inode - */ - retval = ext4_journal_extend(handle, 1); - if (retval != 0) { - retval = ext4_journal_restart(handle, 1); - if (retval) - goto err_out; - } - - /* - * We have the extent map build with the tmp inode. - * Now copy the i_data across - */ - ei->i_flags |= EXT4_EXTENTS_FL; - memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); - - /* - * Update i_blocks with the new blocks that got - * allocated while adding extents for extent index - * blocks. - * - * While converting to extents we need not - * update the orignal inode i_blocks for extent blocks - * via quota APIs. The quota update happened via tmp_inode already. - */ - spin_lock(&inode->i_lock); - inode->i_blocks += tmp_inode->i_blocks; - spin_unlock(&inode->i_lock); - - ext4_mark_inode_dirty(handle, inode); -err_out: - return retval; -} - -static int free_ext_idx(handle_t *handle, struct inode *inode, - struct ext4_extent_idx *ix) -{ - int i, retval = 0; - ext4_fsblk_t block; - struct buffer_head *bh; - struct ext4_extent_header *eh; - - block = idx_pblock(ix); - bh = sb_bread(inode->i_sb, block); - if (!bh) - return -EIO; - - eh = (struct ext4_extent_header *)bh->b_data; - if (eh->eh_depth != 0) { - ix = EXT_FIRST_INDEX(eh); - for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { - retval = free_ext_idx(handle, inode, ix); - if (retval) - break; - } - } - put_bh(bh); - ext4_free_blocks(handle, inode, block, 1, 1); - return retval; -} - -/* - * Free the extent meta data blocks only - */ -static int free_ext_block(handle_t *handle, struct inode *inode) -{ - int i, retval = 0; - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data; - struct ext4_extent_idx *ix; - if (eh->eh_depth == 0) - /* - * No extra blocks allocated for extent meta data - */ - return 0; - ix = EXT_FIRST_INDEX(eh); - for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { - retval = free_ext_idx(handle, inode, ix); - if (retval) - return retval; - } - return retval; - -} - -int ext4_ext_migrate(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - handle_t *handle; - int retval = 0, i; - __le32 *i_data; - ext4_lblk_t blk_count = 0; - struct ext4_inode_info *ei; - struct inode *tmp_inode = NULL; - struct list_blocks_struct lb; - unsigned long max_entries; - - if (!test_opt(inode->i_sb, EXTENTS)) - /* - * if mounted with noextents we don't allow the migrate - */ - return -EINVAL; - - if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) - return -EINVAL; - - down_write(&EXT4_I(inode)->i_data_sem); - handle = ext4_journal_start(inode, - EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) - + 1); - if (IS_ERR(handle)) { - retval = PTR_ERR(handle); - goto err_out; - } - tmp_inode = ext4_new_inode(handle, - inode->i_sb->s_root->d_inode, - S_IFREG); - if (IS_ERR(tmp_inode)) { - retval = -ENOMEM; - ext4_journal_stop(handle); - tmp_inode = NULL; - goto err_out; - } - i_size_write(tmp_inode, i_size_read(inode)); - /* - * We don't want the inode to be reclaimed - * if we got interrupted in between. We have - * this tmp inode carrying reference to the - * data blocks of the original file. We set - * the i_nlink to zero at the last stage after - * switching the original file to extent format - */ - tmp_inode->i_nlink = 1; - - ext4_ext_tree_init(handle, tmp_inode); - ext4_orphan_add(handle, tmp_inode); - ext4_journal_stop(handle); - - ei = EXT4_I(inode); - i_data = ei->i_data; - memset(&lb, 0, sizeof(lb)); - - /* 32 bit block address 4 bytes */ - max_entries = inode->i_sb->s_blocksize >> 2; - - /* - * start with one credit accounted for - * superblock modification. - * - * For the tmp_inode we already have commited the - * trascation that created the inode. Later as and - * when we add extents we extent the journal - */ - handle = ext4_journal_start(inode, 1); - for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { - if (i_data[i]) { - retval = update_extent_range(handle, tmp_inode, - le32_to_cpu(i_data[i]), - blk_count, &lb); - if (retval) - goto err_out; - } - } - if (i_data[EXT4_IND_BLOCK]) { - retval = update_ind_extent_range(handle, tmp_inode, - le32_to_cpu(i_data[EXT4_IND_BLOCK]), - &blk_count, &lb); - if (retval) - goto err_out; - } else - blk_count += max_entries; - if (i_data[EXT4_DIND_BLOCK]) { - retval = update_dind_extent_range(handle, tmp_inode, - le32_to_cpu(i_data[EXT4_DIND_BLOCK]), - &blk_count, &lb); - if (retval) - goto err_out; - } else - blk_count += max_entries * max_entries; - if (i_data[EXT4_TIND_BLOCK]) { - retval = update_tind_extent_range(handle, tmp_inode, - le32_to_cpu(i_data[EXT4_TIND_BLOCK]), - &blk_count, &lb); - if (retval) - goto err_out; - } - /* - * Build the last extent - */ - retval = finish_range(handle, tmp_inode, &lb); -err_out: - /* - * We are either freeing extent information or indirect - * blocks. During this we touch superblock, group descriptor - * and block bitmap. Later we mark the tmp_inode dirty - * via ext4_ext_tree_init. So allocate a credit of 4 - * We may update quota (user and group). - * - * FIXME!! we may be touching bitmaps in different block groups. - */ - if (ext4_journal_extend(handle, - 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) - ext4_journal_restart(handle, - 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); - if (retval) - /* - * Failure case delete the extent information with the - * tmp_inode - */ - free_ext_block(handle, tmp_inode); - else - retval = ext4_ext_swap_inode_data(handle, inode, - tmp_inode, retval); - - /* - * Mark the tmp_inode as of size zero - */ - i_size_write(tmp_inode, 0); - - /* - * set the i_blocks count to zero - * so that the ext4_delete_inode does the - * right job - * - * We don't need to take the i_lock because - * the inode is not visible to user space. - */ - tmp_inode->i_blocks = 0; - - /* Reset the extent details */ - ext4_ext_tree_init(handle, tmp_inode); - - /* - * Set the i_nlink to zero so that - * generic_drop_inode really deletes the - * inode - */ - tmp_inode->i_nlink = 0; - - ext4_journal_stop(handle); - - up_write(&EXT4_I(inode)->i_data_sem); - - if (tmp_inode) - iput(tmp_inode); - - return retval; -} diff --git a/trunk/fs/ext4/namei.c b/trunk/fs/ext4/namei.c index 67b6d8a1ceff..94ee6f315dc1 100644 --- a/trunk/fs/ext4/namei.c +++ b/trunk/fs/ext4/namei.c @@ -51,7 +51,7 @@ static struct buffer_head *ext4_append(handle_t *handle, struct inode *inode, - ext4_lblk_t *block, int *err) + u32 *block, int *err) { struct buffer_head *bh; @@ -144,8 +144,8 @@ struct dx_map_entry u16 size; }; -static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); -static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); +static inline unsigned dx_get_block (struct dx_entry *entry); +static void dx_set_block (struct dx_entry *entry, unsigned value); static inline unsigned dx_get_hash (struct dx_entry *entry); static void dx_set_hash (struct dx_entry *entry, unsigned value); static unsigned dx_get_count (struct dx_entry *entries); @@ -166,8 +166,7 @@ static void dx_sort_map(struct dx_map_entry *map, unsigned count); static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, struct dx_map_entry *offsets, int count); static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); -static void dx_insert_block(struct dx_frame *frame, - u32 hash, ext4_lblk_t block); +static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); static int ext4_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, struct dx_frame *frames, @@ -182,12 +181,12 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, * Mask them off for now. */ -static inline ext4_lblk_t dx_get_block(struct dx_entry *entry) +static inline unsigned dx_get_block (struct dx_entry *entry) { return le32_to_cpu(entry->block) & 0x00ffffff; } -static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) +static inline void dx_set_block (struct dx_entry *entry, unsigned value) { entry->block = cpu_to_le32(value); } @@ -244,8 +243,8 @@ static void dx_show_index (char * label, struct dx_entry *entries) int i, n = dx_get_count (entries); printk("%s index ", label); for (i = 0; i < n; i++) { - printk("%x->%lu ", i? dx_get_hash(entries + i) : - 0, (unsigned long)dx_get_block(entries + i)); + printk("%x->%u ", i? dx_get_hash(entries + i) : + 0, dx_get_block(entries + i)); } printk("\n"); } @@ -281,7 +280,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent space += EXT4_DIR_REC_LEN(de->name_len); names++; } - de = ext4_next_entry(de); + de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); } printk("(%i)\n", names); return (struct stats) { names, space, 1 }; @@ -298,8 +297,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, printk("%i indexed blocks...\n", count); for (i = 0; i < count; i++, entries++) { - ext4_lblk_t block = dx_get_block(entries); - ext4_lblk_t hash = i ? dx_get_hash(entries): 0; + u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; struct stats stats; printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); @@ -553,8 +551,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, */ static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p) { - return (struct ext4_dir_entry_2 *)((char *)p + - ext4_rec_len_from_disk(p->rec_len)); + return (struct ext4_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); } /* @@ -563,7 +560,7 @@ static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 * * into the tree. If there is an error it is returned in err. */ static int htree_dirblock_to_tree(struct file *dir_file, - struct inode *dir, ext4_lblk_t block, + struct inode *dir, int block, struct dx_hash_info *hinfo, __u32 start_hash, __u32 start_minor_hash) { @@ -571,8 +568,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, struct ext4_dir_entry_2 *de, *top; int err, count = 0; - dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", - (unsigned long)block)); + dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) return err; @@ -624,9 +620,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, struct ext4_dir_entry_2 *de; struct dx_frame frames[2], *frame; struct inode *dir; - ext4_lblk_t block; + int block, err; int count = 0; - int ret, err; + int ret; __u32 hashval; dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, @@ -724,7 +720,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size, cond_resched(); } /* XXX: do we need to check rec_len == 0 case? -Chris */ - de = ext4_next_entry(de); + de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); } return count; } @@ -756,7 +752,7 @@ static void dx_sort_map (struct dx_map_entry *map, unsigned count) } while(more); } -static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) +static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) { struct dx_entry *entries = frame->entries; struct dx_entry *old = frame->at, *new = old + 1; @@ -824,7 +820,7 @@ static inline int search_dirblock(struct buffer_head * bh, return 1; } /* prevent looping on a bad block */ - de_len = ext4_rec_len_from_disk(de->rec_len); + de_len = le16_to_cpu(de->rec_len); if (de_len <= 0) return -1; offset += de_len; @@ -851,20 +847,23 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, struct super_block * sb; struct buffer_head * bh_use[NAMEI_RA_SIZE]; struct buffer_head * bh, *ret = NULL; - ext4_lblk_t start, block, b; + unsigned long start, block, b; int ra_max = 0; /* Number of bh's in the readahead buffer, bh_use[] */ int ra_ptr = 0; /* Current index into readahead buffer */ int num = 0; - ext4_lblk_t nblocks; - int i, err; + int nblocks, i, err; struct inode *dir = dentry->d_parent->d_inode; int namelen; + const u8 *name; + unsigned blocksize; *res_dir = NULL; sb = dir->i_sb; + blocksize = sb->s_blocksize; namelen = dentry->d_name.len; + name = dentry->d_name.name; if (namelen > EXT4_NAME_LEN) return NULL; if (is_dx(dir)) { @@ -915,8 +914,7 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, if (!buffer_uptodate(bh)) { /* read error, skip block & hope for the best */ ext4_error(sb, __FUNCTION__, "reading directory #%lu " - "offset %lu", dir->i_ino, - (unsigned long)block); + "offset %lu", dir->i_ino, block); brelse(bh); goto next; } @@ -963,7 +961,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, struct dx_frame frames[2], *frame; struct ext4_dir_entry_2 *de, *top; struct buffer_head *bh; - ext4_lblk_t block; + unsigned long block; int retval; int namelen = dentry->d_name.len; const u8 *name = dentry->d_name.name; @@ -1130,7 +1128,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) rec_len = EXT4_DIR_REC_LEN(de->name_len); memcpy (to, de, rec_len); ((struct ext4_dir_entry_2 *) to)->rec_len = - ext4_rec_len_to_disk(rec_len); + cpu_to_le16(rec_len); de->inode = 0; map++; to += rec_len; @@ -1149,12 +1147,13 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) prev = to = de; while ((char*)de < base + size) { - next = ext4_next_entry(de); + next = (struct ext4_dir_entry_2 *) ((char *) de + + le16_to_cpu(de->rec_len)); if (de->inode && de->name_len) { rec_len = EXT4_DIR_REC_LEN(de->name_len); if (de > to) memmove(to, de, rec_len); - to->rec_len = ext4_rec_len_to_disk(rec_len); + to->rec_len = cpu_to_le16(rec_len); prev = to; to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len); } @@ -1175,7 +1174,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, unsigned blocksize = dir->i_sb->s_blocksize; unsigned count, continued; struct buffer_head *bh2; - ext4_lblk_t newblock; + u32 newblock; u32 hash2; struct dx_map_entry *map; char *data1 = (*bh)->b_data, *data2; @@ -1222,15 +1221,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, split = count - move; hash2 = map[split].hash; continued = hash2 == map[split - 1].hash; - dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n", - (unsigned long)dx_get_block(frame->at), - hash2, split, count-split)); + dxtrace(printk("Split block %i at %x, %i/%i\n", + dx_get_block(frame->at), hash2, split, count-split)); /* Fancy dance to stay within two buffers */ de2 = dx_move_dirents(data1, data2, map + split, count - split); de = dx_pack_dirents(data1,blocksize); - de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); - de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); + de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); + de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); @@ -1299,7 +1297,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, return -EEXIST; } nlen = EXT4_DIR_REC_LEN(de->name_len); - rlen = ext4_rec_len_from_disk(de->rec_len); + rlen = le16_to_cpu(de->rec_len); if ((de->inode? rlen - nlen: rlen) >= reclen) break; de = (struct ext4_dir_entry_2 *)((char *)de + rlen); @@ -1318,11 +1316,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, /* By now the buffer is marked for journaling */ nlen = EXT4_DIR_REC_LEN(de->name_len); - rlen = ext4_rec_len_from_disk(de->rec_len); + rlen = le16_to_cpu(de->rec_len); if (de->inode) { struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen); - de1->rec_len = ext4_rec_len_to_disk(rlen - nlen); - de->rec_len = ext4_rec_len_to_disk(nlen); + de1->rec_len = cpu_to_le16(rlen - nlen); + de->rec_len = cpu_to_le16(nlen); de = de1; } de->file_type = EXT4_FT_UNKNOWN; @@ -1376,7 +1374,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, int retval; unsigned blocksize; struct dx_hash_info hinfo; - ext4_lblk_t block; + u32 block; struct fake_dirent *fde; blocksize = dir->i_sb->s_blocksize; @@ -1399,18 +1397,17 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, /* The 0th block becomes the root, move the dirents out */ fde = &root->dotdot; - de = (struct ext4_dir_entry_2 *)((char *)fde + - ext4_rec_len_from_disk(fde->rec_len)); + de = (struct ext4_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len)); len = ((char *) root) + blocksize - (char *) de; memcpy (data1, de, len); de = (struct ext4_dir_entry_2 *) data1; top = data1 + len; - while ((char *)(de2 = ext4_next_entry(de)) < top) + while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top) de = de2; - de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); + de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); /* Initialize the root; the dot dirents already exist */ de = (struct ext4_dir_entry_2 *) (&root->dotdot); - de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2)); + de->rec_len = cpu_to_le16(blocksize - EXT4_DIR_REC_LEN(2)); memset (&root->info, 0, sizeof(root->info)); root->info.info_length = sizeof(root->info); root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; @@ -1457,7 +1454,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry, int retval; int dx_fallback=0; unsigned blocksize; - ext4_lblk_t block, blocks; + u32 block, blocks; sb = dir->i_sb; blocksize = sb->s_blocksize; @@ -1490,7 +1487,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry, return retval; de = (struct ext4_dir_entry_2 *) bh->b_data; de->inode = 0; - de->rec_len = ext4_rec_len_to_disk(blocksize); + de->rec_len = cpu_to_le16(blocksize); return add_dirent_to_buf(handle, dentry, inode, de, bh); } @@ -1534,7 +1531,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, dx_get_count(entries), dx_get_limit(entries))); /* Need to split index? */ if (dx_get_count(entries) == dx_get_limit(entries)) { - ext4_lblk_t newblock; + u32 newblock; unsigned icount = dx_get_count(entries); int levels = frame - frames; struct dx_entry *entries2; @@ -1553,7 +1550,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, goto cleanup; node2 = (struct dx_node *)(bh2->b_data); entries2 = node2->entries; - node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize); + node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); node2->fake.inode = 0; BUFFER_TRACE(frame->bh, "get_write_access"); err = ext4_journal_get_write_access(handle, frame->bh); @@ -1651,9 +1648,9 @@ static int ext4_delete_entry (handle_t *handle, BUFFER_TRACE(bh, "get_write_access"); ext4_journal_get_write_access(handle, bh); if (pde) - pde->rec_len = ext4_rec_len_to_disk( - ext4_rec_len_from_disk(pde->rec_len) + - ext4_rec_len_from_disk(de->rec_len)); + pde->rec_len = + cpu_to_le16(le16_to_cpu(pde->rec_len) + + le16_to_cpu(de->rec_len)); else de->inode = 0; dir->i_version++; @@ -1661,9 +1658,10 @@ static int ext4_delete_entry (handle_t *handle, ext4_journal_dirty_metadata(handle, bh); return 0; } - i += ext4_rec_len_from_disk(de->rec_len); + i += le16_to_cpu(de->rec_len); pde = de; - de = ext4_next_entry(de); + de = (struct ext4_dir_entry_2 *) + ((char *) de + le16_to_cpu(de->rec_len)); } return -ENOENT; } @@ -1826,13 +1824,13 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) de = (struct ext4_dir_entry_2 *) dir_block->b_data; de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; - de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); + de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de->name_len)); strcpy (de->name, "."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); - de = ext4_next_entry(de); + de = (struct ext4_dir_entry_2 *) + ((char *) de + le16_to_cpu(de->rec_len)); de->inode = cpu_to_le32(dir->i_ino); - de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - - EXT4_DIR_REC_LEN(1)); + de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1)); de->name_len = 2; strcpy (de->name, ".."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); @@ -1884,7 +1882,8 @@ static int empty_dir (struct inode * inode) return 1; } de = (struct ext4_dir_entry_2 *) bh->b_data; - de1 = ext4_next_entry(de); + de1 = (struct ext4_dir_entry_2 *) + ((char *) de + le16_to_cpu(de->rec_len)); if (le32_to_cpu(de->inode) != inode->i_ino || !le32_to_cpu(de1->inode) || strcmp (".", de->name) || @@ -1895,9 +1894,9 @@ static int empty_dir (struct inode * inode) brelse (bh); return 1; } - offset = ext4_rec_len_from_disk(de->rec_len) + - ext4_rec_len_from_disk(de1->rec_len); - de = ext4_next_entry(de1); + offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); + de = (struct ext4_dir_entry_2 *) + ((char *) de1 + le16_to_cpu(de1->rec_len)); while (offset < inode->i_size ) { if (!bh || (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { @@ -1926,8 +1925,9 @@ static int empty_dir (struct inode * inode) brelse (bh); return 0; } - offset += ext4_rec_len_from_disk(de->rec_len); - de = ext4_next_entry(de); + offset += le16_to_cpu(de->rec_len); + de = (struct ext4_dir_entry_2 *) + ((char *) de + le16_to_cpu(de->rec_len)); } brelse (bh); return 1; @@ -2282,7 +2282,8 @@ static int ext4_link (struct dentry * old_dentry, } #define PARENT_INO(buffer) \ - (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer))->inode) + ((struct ext4_dir_entry_2 *) ((char *) buffer + \ + le16_to_cpu(((struct ext4_dir_entry_2 *) buffer)->rec_len)))->inode /* * Anybody can rename anything with this: the permission checks are left to the diff --git a/trunk/fs/ext4/resize.c b/trunk/fs/ext4/resize.c index 4fbba60816f4..bd8a52bb3999 100644 --- a/trunk/fs/ext4/resize.c +++ b/trunk/fs/ext4/resize.c @@ -28,7 +28,7 @@ static int verify_group_input(struct super_block *sb, struct ext4_super_block *es = sbi->s_es; ext4_fsblk_t start = ext4_blocks_count(es); ext4_fsblk_t end = start + input->blocks_count; - ext4_group_t group = input->group; + unsigned group = input->group; ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; unsigned overhead = ext4_bg_has_super(sb, group) ? (1 + ext4_bg_num_gdb(sb, group) + @@ -206,7 +206,7 @@ static int setup_new_group_blocks(struct super_block *sb, } if (ext4_bg_has_super(sb, input->group)) { - ext4_debug("mark backup superblock %#04llx (+0)\n", start); + ext4_debug("mark backup superblock %#04lx (+0)\n", start); ext4_set_bit(0, bh->b_data); } @@ -215,7 +215,7 @@ static int setup_new_group_blocks(struct super_block *sb, i < gdblocks; i++, block++, bit++) { struct buffer_head *gdb; - ext4_debug("update backup group %#04llx (+%d)\n", block, bit); + ext4_debug("update backup group %#04lx (+%d)\n", block, bit); if ((err = extend_or_restart_transaction(handle, 1, bh))) goto exit_bh; @@ -243,7 +243,7 @@ static int setup_new_group_blocks(struct super_block *sb, i < reserved_gdb; i++, block++, bit++) { struct buffer_head *gdb; - ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit); + ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit); if ((err = extend_or_restart_transaction(handle, 1, bh))) goto exit_bh; @@ -256,10 +256,10 @@ static int setup_new_group_blocks(struct super_block *sb, ext4_set_bit(bit, bh->b_data); brelse(gdb); } - ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, + ext4_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap, input->block_bitmap - start); ext4_set_bit(input->block_bitmap - start, bh->b_data); - ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, + ext4_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap, input->inode_bitmap - start); ext4_set_bit(input->inode_bitmap - start, bh->b_data); @@ -268,7 +268,7 @@ static int setup_new_group_blocks(struct super_block *sb, i < sbi->s_itb_per_group; i++, bit++, block++) { struct buffer_head *it; - ext4_debug("clear inode block %#04llx (+%d)\n", block, bit); + ext4_debug("clear inode block %#04lx (+%d)\n", block, bit); if ((err = extend_or_restart_transaction(handle, 1, bh))) goto exit_bh; @@ -291,7 +291,7 @@ static int setup_new_group_blocks(struct super_block *sb, brelse(bh); /* Mark unused entries in inode bitmap used */ - ext4_debug("clear inode bitmap %#04llx (+%llu)\n", + ext4_debug("clear inode bitmap %#04x (+%ld)\n", input->inode_bitmap, input->inode_bitmap - start); if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { err = PTR_ERR(bh); @@ -357,7 +357,7 @@ static int verify_reserved_gdb(struct super_block *sb, struct buffer_head *primary) { const ext4_fsblk_t blk = primary->b_blocknr; - const ext4_group_t end = EXT4_SB(sb)->s_groups_count; + const unsigned long end = EXT4_SB(sb)->s_groups_count; unsigned three = 1; unsigned five = 5; unsigned seven = 7; @@ -656,12 +656,12 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, int size) { struct ext4_sb_info *sbi = EXT4_SB(sb); - const ext4_group_t last = sbi->s_groups_count; + const unsigned long last = sbi->s_groups_count; const int bpg = EXT4_BLOCKS_PER_GROUP(sb); unsigned three = 1; unsigned five = 5; unsigned seven = 7; - ext4_group_t group; + unsigned group; int rest = sb->s_blocksize - size; handle_t *handle; int err = 0, err2; @@ -716,7 +716,7 @@ static void update_backups(struct super_block *sb, exit_err: if (err) { ext4_warning(sb, __FUNCTION__, - "can't update backup for group %lu (err %d), " + "can't update backup for group %d (err %d), " "forcing fsck on next reboot", group, err); sbi->s_mount_state &= ~EXT4_VALID_FS; sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); @@ -952,7 +952,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_fsblk_t n_blocks_count) { ext4_fsblk_t o_blocks_count; - ext4_group_t o_groups_count; + unsigned long o_groups_count; ext4_grpblk_t last; ext4_grpblk_t add; struct buffer_head * bh; @@ -1054,7 +1054,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); sb->s_dirt = 1; unlock_super(sb); - ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, + ext4_debug("freeing blocks %lu through %llu\n", o_blocks_count, o_blocks_count + add); ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, diff --git a/trunk/fs/ext4/super.c b/trunk/fs/ext4/super.c index 055a0cd0168e..1ca0f546c466 100644 --- a/trunk/fs/ext4/super.c +++ b/trunk/fs/ext4/super.c @@ -373,66 +373,6 @@ void ext4_update_dynamic_rev(struct super_block *sb) */ } -int ext4_update_compat_feature(handle_t *handle, - struct super_block *sb, __u32 compat) -{ - int err = 0; - if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) { - err = ext4_journal_get_write_access(handle, - EXT4_SB(sb)->s_sbh); - if (err) - return err; - EXT4_SET_COMPAT_FEATURE(sb, compat); - sb->s_dirt = 1; - handle->h_sync = 1; - BUFFER_TRACE(EXT4_SB(sb)->s_sbh, - "call ext4_journal_dirty_met adata"); - err = ext4_journal_dirty_metadata(handle, - EXT4_SB(sb)->s_sbh); - } - return err; -} - -int ext4_update_rocompat_feature(handle_t *handle, - struct super_block *sb, __u32 rocompat) -{ - int err = 0; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) { - err = ext4_journal_get_write_access(handle, - EXT4_SB(sb)->s_sbh); - if (err) - return err; - EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat); - sb->s_dirt = 1; - handle->h_sync = 1; - BUFFER_TRACE(EXT4_SB(sb)->s_sbh, - "call ext4_journal_dirty_met adata"); - err = ext4_journal_dirty_metadata(handle, - EXT4_SB(sb)->s_sbh); - } - return err; -} - -int ext4_update_incompat_feature(handle_t *handle, - struct super_block *sb, __u32 incompat) -{ - int err = 0; - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) { - err = ext4_journal_get_write_access(handle, - EXT4_SB(sb)->s_sbh); - if (err) - return err; - EXT4_SET_INCOMPAT_FEATURE(sb, incompat); - sb->s_dirt = 1; - handle->h_sync = 1; - BUFFER_TRACE(EXT4_SB(sb)->s_sbh, - "call ext4_journal_dirty_met adata"); - err = ext4_journal_dirty_metadata(handle, - EXT4_SB(sb)->s_sbh); - } - return err; -} - /* * Open the external journal device */ @@ -503,7 +443,6 @@ static void ext4_put_super (struct super_block * sb) struct ext4_super_block *es = sbi->s_es; int i; - ext4_mb_release(sb); ext4_ext_release(sb); ext4_xattr_put_super(sb); jbd2_journal_destroy(sbi->s_journal); @@ -570,8 +509,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); - INIT_LIST_HEAD(&ei->i_prealloc_list); - spin_lock_init(&ei->i_prealloc_lock); return &ei->vfs_inode; } @@ -596,7 +533,7 @@ static void init_once(struct kmem_cache *cachep, void *foo) #ifdef CONFIG_EXT4DEV_FS_XATTR init_rwsem(&ei->xattr_sem); #endif - init_rwsem(&ei->i_data_sem); + mutex_init(&ei->truncate_mutex); inode_init_once(&ei->vfs_inode); } @@ -668,20 +605,18 @@ static inline void ext4_show_quota_options(struct seq_file *seq, struct super_bl */ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) { - int def_errors; - unsigned long def_mount_opts; struct super_block *sb = vfs->mnt_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + unsigned long def_mount_opts; def_mount_opts = le32_to_cpu(es->s_default_mount_opts); - def_errors = le16_to_cpu(es->s_errors); if (sbi->s_sb_block != 1) seq_printf(seq, ",sb=%llu", sbi->s_sb_block); if (test_opt(sb, MINIX_DF)) seq_puts(seq, ",minixdf"); - if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) + if (test_opt(sb, GRPID)) seq_puts(seq, ",grpid"); if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) seq_puts(seq, ",nogrpid"); @@ -693,33 +628,34 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { seq_printf(seq, ",resgid=%u", sbi->s_resgid); } - if (test_opt(sb, ERRORS_RO)) { + if (test_opt(sb, ERRORS_CONT)) { + int def_errors = le16_to_cpu(es->s_errors); + if (def_errors == EXT4_ERRORS_PANIC || - def_errors == EXT4_ERRORS_CONTINUE) { - seq_puts(seq, ",errors=remount-ro"); + def_errors == EXT4_ERRORS_RO) { + seq_puts(seq, ",errors=continue"); } } - if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) - seq_puts(seq, ",errors=continue"); - if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) + if (test_opt(sb, ERRORS_RO)) + seq_puts(seq, ",errors=remount-ro"); + if (test_opt(sb, ERRORS_PANIC)) seq_puts(seq, ",errors=panic"); - if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) + if (test_opt(sb, NO_UID32)) seq_puts(seq, ",nouid32"); - if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) + if (test_opt(sb, DEBUG)) seq_puts(seq, ",debug"); if (test_opt(sb, OLDALLOC)) seq_puts(seq, ",oldalloc"); -#ifdef CONFIG_EXT4DEV_FS_XATTR - if (test_opt(sb, XATTR_USER) && - !(def_mount_opts & EXT4_DEFM_XATTR_USER)) +#ifdef CONFIG_EXT4_FS_XATTR + if (test_opt(sb, XATTR_USER)) seq_puts(seq, ",user_xattr"); if (!test_opt(sb, XATTR_USER) && (def_mount_opts & EXT4_DEFM_XATTR_USER)) { seq_puts(seq, ",nouser_xattr"); } #endif -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL - if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) +#ifdef CONFIG_EXT4_FS_POSIX_ACL + if (test_opt(sb, POSIX_ACL)) seq_puts(seq, ",acl"); if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) seq_puts(seq, ",noacl"); @@ -736,17 +672,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",nobh"); if (!test_opt(sb, EXTENTS)) seq_puts(seq, ",noextents"); - if (!test_opt(sb, MBALLOC)) - seq_puts(seq, ",nomballoc"); - if (test_opt(sb, I_VERSION)) - seq_puts(seq, ",i_version"); - if (sbi->s_stripe) - seq_printf(seq, ",stripe=%lu", sbi->s_stripe); - /* - * journal mode get enabled in different ways - * So just print the value even if we didn't specify it - */ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) seq_puts(seq, ",data=journal"); else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) @@ -755,6 +681,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",data=writeback"); ext4_show_quota_options(seq, sb); + return 0; } @@ -882,13 +809,11 @@ enum { Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, - Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, - Opt_mballoc, Opt_nomballoc, Opt_stripe, + Opt_grpquota, Opt_extents, Opt_noextents, }; static match_table_t tokens = { @@ -923,8 +848,6 @@ static match_table_t tokens = { {Opt_journal_update, "journal=update"}, {Opt_journal_inum, "journal=%u"}, {Opt_journal_dev, "journal_dev=%u"}, - {Opt_journal_checksum, "journal_checksum"}, - {Opt_journal_async_commit, "journal_async_commit"}, {Opt_abort, "abort"}, {Opt_data_journal, "data=journal"}, {Opt_data_ordered, "data=ordered"}, @@ -942,10 +865,6 @@ static match_table_t tokens = { {Opt_barrier, "barrier=%u"}, {Opt_extents, "extents"}, {Opt_noextents, "noextents"}, - {Opt_i_version, "i_version"}, - {Opt_mballoc, "mballoc"}, - {Opt_nomballoc, "nomballoc"}, - {Opt_stripe, "stripe=%u"}, {Opt_err, NULL}, {Opt_resize, "resize"}, }; @@ -1116,13 +1035,6 @@ static int parse_options (char *options, struct super_block *sb, return 0; *journal_devnum = option; break; - case Opt_journal_checksum: - set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); - break; - case Opt_journal_async_commit: - set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); - set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); - break; case Opt_noload: set_opt (sbi->s_mount_opt, NOLOAD); break; @@ -1291,23 +1203,6 @@ static int parse_options (char *options, struct super_block *sb, case Opt_noextents: clear_opt (sbi->s_mount_opt, EXTENTS); break; - case Opt_i_version: - set_opt(sbi->s_mount_opt, I_VERSION); - sb->s_flags |= MS_I_VERSION; - break; - case Opt_mballoc: - set_opt(sbi->s_mount_opt, MBALLOC); - break; - case Opt_nomballoc: - clear_opt(sbi->s_mount_opt, MBALLOC); - break; - case Opt_stripe: - if (match_int(&args[0], &option)) - return 0; - if (option < 0) - return 0; - sbi->s_stripe = option; - break; default: printk (KERN_ERR "EXT4-fs: Unrecognized mount option \"%s\" " @@ -1469,7 +1364,7 @@ static int ext4_check_descriptors (struct super_block * sb) struct ext4_group_desc * gdp = NULL; int desc_block = 0; int flexbg_flag = 0; - ext4_group_t i; + int i; if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) flexbg_flag = 1; @@ -1491,7 +1386,7 @@ static int ext4_check_descriptors (struct super_block * sb) if (block_bitmap < first_block || block_bitmap > last_block) { ext4_error (sb, "ext4_check_descriptors", - "Block bitmap for group %lu" + "Block bitmap for group %d" " not in group (block %llu)!", i, block_bitmap); return 0; @@ -1500,7 +1395,7 @@ static int ext4_check_descriptors (struct super_block * sb) if (inode_bitmap < first_block || inode_bitmap > last_block) { ext4_error (sb, "ext4_check_descriptors", - "Inode bitmap for group %lu" + "Inode bitmap for group %d" " not in group (block %llu)!", i, inode_bitmap); return 0; @@ -1510,16 +1405,17 @@ static int ext4_check_descriptors (struct super_block * sb) inode_table + sbi->s_itb_per_group - 1 > last_block) { ext4_error (sb, "ext4_check_descriptors", - "Inode table for group %lu" + "Inode table for group %d" " not in group (block %llu)!", i, inode_table); return 0; } if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { ext4_error(sb, __FUNCTION__, - "Checksum for group %lu failed (%u!=%u)\n", - i, le16_to_cpu(ext4_group_desc_csum(sbi, i, - gdp)), le16_to_cpu(gdp->bg_checksum)); + "Checksum for group %d failed (%u!=%u)\n", i, + le16_to_cpu(ext4_group_desc_csum(sbi, i, + gdp)), + le16_to_cpu(gdp->bg_checksum)); return 0; } if (!flexbg_flag) @@ -1533,6 +1429,7 @@ static int ext4_check_descriptors (struct super_block * sb) return 1; } + /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at * the superblock) which were deleted from all directories, but held open by * a process at the time of a crash. We walk the list and try to delete these @@ -1645,95 +1542,20 @@ static void ext4_orphan_cleanup (struct super_block * sb, #endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ } -/* - * Maximal extent format file size. - * Resulting logical blkno at s_maxbytes must fit in our on-disk - * extent format containers, within a sector_t, and within i_blocks - * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, - * so that won't be a limiting factor. - * - * Note, this does *not* consider any metadata overhead for vfs i_blocks. - */ -static loff_t ext4_max_size(int blkbits) -{ - loff_t res; - loff_t upper_limit = MAX_LFS_FILESIZE; - - /* small i_blocks in vfs inode? */ - if (sizeof(blkcnt_t) < sizeof(u64)) { - /* - * CONFIG_LSF is not enabled implies the inode - * i_block represent total blocks in 512 bytes - * 32 == size of vfs inode i_blocks * 8 - */ - upper_limit = (1LL << 32) - 1; - - /* total blocks in file system block size */ - upper_limit >>= (blkbits - 9); - upper_limit <<= blkbits; - } - - /* 32-bit extent-start container, ee_block */ - res = 1LL << 32; - res <<= blkbits; - res -= 1; - - /* Sanity check against vm- & vfs- imposed limits */ - if (res > upper_limit) - res = upper_limit; - - return res; -} /* - * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect - * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. - * We need to be 1 filesystem block less than the 2^48 sector limit. + * Maximal file size. There is a direct, and {,double-,triple-}indirect + * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. + * We need to be 1 filesystem block less than the 2^32 sector limit. */ -static loff_t ext4_max_bitmap_size(int bits) +static loff_t ext4_max_size(int bits) { loff_t res = EXT4_NDIR_BLOCKS; - int meta_blocks; - loff_t upper_limit; - /* This is calculated to be the largest file size for a - * dense, bitmapped file such that the total number of + /* This constant is calculated to be the largest file size for a + * dense, 4k-blocksize file such that the total number of * sectors in the file, including data and all indirect blocks, - * does not exceed 2^48 -1 - * __u32 i_blocks_lo and _u16 i_blocks_high representing the - * total number of 512 bytes blocks of the file - */ - - if (sizeof(blkcnt_t) < sizeof(u64)) { - /* - * CONFIG_LSF is not enabled implies the inode - * i_block represent total blocks in 512 bytes - * 32 == size of vfs inode i_blocks * 8 - */ - upper_limit = (1LL << 32) - 1; - - /* total blocks in file system block size */ - upper_limit >>= (bits - 9); - - } else { - /* - * We use 48 bit ext4_inode i_blocks - * With EXT4_HUGE_FILE_FL set the i_blocks - * represent total number of blocks in - * file system block size - */ - upper_limit = (1LL << 48) - 1; - - } - - /* indirect blocks */ - meta_blocks = 1; - /* double indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)); - /* tripple indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); - - upper_limit -= meta_blocks; - upper_limit <<= bits; + * does not exceed 2^32. */ + const loff_t upper_limit = 0x1ff7fffd000LL; res += 1LL << (bits-2); res += 1LL << (2*(bits-2)); @@ -1741,10 +1563,6 @@ static loff_t ext4_max_bitmap_size(int bits) res <<= bits; if (res > upper_limit) res = upper_limit; - - if (res > MAX_LFS_FILESIZE) - res = MAX_LFS_FILESIZE; - return res; } @@ -1752,7 +1570,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, ext4_fsblk_t logical_sb_block, int nr) { struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_group_t bg, first_meta_bg; + unsigned long bg, first_meta_bg; int has_super = 0; first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); @@ -1766,39 +1584,8 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, return (has_super + ext4_group_first_block_no(sb, bg)); } -/** - * ext4_get_stripe_size: Get the stripe size. - * @sbi: In memory super block info - * - * If we have specified it via mount option, then - * use the mount option value. If the value specified at mount time is - * greater than the blocks per group use the super block value. - * If the super block value is greater than blocks per group return 0. - * Allocator needs it be less than blocks per group. - * - */ -static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) -{ - unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); - unsigned long stripe_width = - le32_to_cpu(sbi->s_es->s_raid_stripe_width); - - if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) - return sbi->s_stripe; - - if (stripe_width <= sbi->s_blocks_per_group) - return stripe_width; - - if (stride <= sbi->s_blocks_per_group) - return stride; - - return 0; -} static int ext4_fill_super (struct super_block *sb, void *data, int silent) - __releases(kernel_sem) - __acquires(kernel_sem) - { struct buffer_head * bh; struct ext4_super_block *es = NULL; @@ -1812,6 +1599,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) unsigned long def_mount_opts; struct inode *root; int blocksize; + int hblock; int db_count; int i; int needs_recovery; @@ -1836,11 +1624,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) goto out_fail; } - if (!sb_set_blocksize(sb, blocksize)) { - printk(KERN_ERR "EXT4-fs: bad blocksize %d.\n", blocksize); - goto out_fail; - } - /* * The ext4 superblock will not be buffer aligned for other than 1kB * block sizes. We need to calculate the offset from buffer start. @@ -1891,10 +1674,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) set_opt(sbi->s_mount_opt, ERRORS_PANIC); - else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) - set_opt(sbi->s_mount_opt, ERRORS_CONT); - else + else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_RO) set_opt(sbi->s_mount_opt, ERRORS_RO); + else + set_opt(sbi->s_mount_opt, ERRORS_CONT); sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); @@ -1906,11 +1689,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) * User -o noextents to turn it off */ set_opt(sbi->s_mount_opt, EXTENTS); - /* - * turn on mballoc feature by default in ext4 filesystem - * User -o nomballoc to turn it off - */ - set_opt(sbi->s_mount_opt, MBALLOC); if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, NULL, 0)) @@ -1945,19 +1723,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sb->s_id, le32_to_cpu(features)); goto failed_mount; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { - /* - * Large file size enabled file system can only be - * mount if kernel is build with CONFIG_LSF - */ - if (sizeof(root->i_blocks) < sizeof(u64) && - !(sb->s_flags & MS_RDONLY)) { - printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " - "files cannot be mounted read-write " - "without CONFIG_LSF.\n", sb->s_id); - goto failed_mount; - } - } blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); if (blocksize < EXT4_MIN_BLOCK_SIZE || @@ -1968,16 +1733,20 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount; } + hblock = bdev_hardsect_size(sb->s_bdev); if (sb->s_blocksize != blocksize) { - - /* Validate the filesystem blocksize */ - if (!sb_set_blocksize(sb, blocksize)) { - printk(KERN_ERR "EXT4-fs: bad block size %d.\n", - blocksize); + /* + * Make sure the blocksize for the filesystem is larger + * than the hardware sectorsize for the machine. + */ + if (blocksize < hblock) { + printk(KERN_ERR "EXT4-fs: blocksize %d too small for " + "device blocksize %d.\n", blocksize, hblock); goto failed_mount; } brelse (bh); + sb_set_blocksize(sb, blocksize); logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; offset = do_div(logical_sb_block, blocksize); bh = sb_bread(sb, logical_sb_block); @@ -1995,7 +1764,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) } } - sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits); sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { @@ -2070,17 +1838,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) if (EXT4_BLOCKS_PER_GROUP(sb) == 0) goto cantfind_ext4; - - /* ensure blocks_count calculation below doesn't sign-extend */ - if (ext4_blocks_count(es) + EXT4_BLOCKS_PER_GROUP(sb) < - le32_to_cpu(es->s_first_data_block) + 1) { - printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu, " - "first data block %u, blocks per group %lu\n", - ext4_blocks_count(es), - le32_to_cpu(es->s_first_data_block), - EXT4_BLOCKS_PER_GROUP(sb)); - goto failed_mount; - } blocks_count = (ext4_blocks_count(es) - le32_to_cpu(es->s_first_data_block) + EXT4_BLOCKS_PER_GROUP(sb) - 1); @@ -2143,8 +1900,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_rsv_window_head.rsv_goal_size = 0; ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); - sbi->s_stripe = ext4_get_stripe_size(sbi); - /* * set up enough so that it can read an inode */ @@ -2189,21 +1944,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount4; } - if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { - jbd2_journal_set_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); - } else if (test_opt(sb, JOURNAL_CHECKSUM)) { - jbd2_journal_set_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); - jbd2_journal_clear_features(sbi->s_journal, 0, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); - } else { - jbd2_journal_clear_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); - } - /* We have now updated the journal if required, so we can * validate the data journaling mode. */ switch (test_opt(sb, DATA_FLAGS)) { @@ -2304,7 +2044,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) "writeback"); ext4_ext_init(sb); - ext4_mb_init(sb, needs_recovery); lock_kernel(); return 0; @@ -2934,7 +2673,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) if (test_opt(sb, MINIX_DF)) { sbi->s_overhead_last = 0; } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { - ext4_group_t ngroups = sbi->s_groups_count, i; + unsigned long ngroups = sbi->s_groups_count, i; ext4_fsblk_t overhead = 0; smp_rmb(); @@ -3170,7 +2909,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) { struct inode *inode = sb_dqopt(sb)->files[type]; - ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); + sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); int tocopy; @@ -3208,7 +2947,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off) { struct inode *inode = sb_dqopt(sb)->files[type]; - ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); + sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); int tocopy; @@ -3263,6 +3002,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, i_size_write(inode, off+len-towrite); EXT4_I(inode)->i_disksize = inode->i_size; } + inode->i_version++; inode->i_mtime = inode->i_ctime = CURRENT_TIME; ext4_mark_inode_dirty(handle, inode); mutex_unlock(&inode->i_mutex); @@ -3287,15 +3027,9 @@ static struct file_system_type ext4dev_fs_type = { static int __init init_ext4_fs(void) { - int err; - - err = init_ext4_mballoc(); + int err = init_ext4_xattr(); if (err) return err; - - err = init_ext4_xattr(); - if (err) - goto out2; err = init_inodecache(); if (err) goto out1; @@ -3307,8 +3041,6 @@ static int __init init_ext4_fs(void) destroy_inodecache(); out1: exit_ext4_xattr(); -out2: - exit_ext4_mballoc(); return err; } @@ -3317,7 +3049,6 @@ static void __exit exit_ext4_fs(void) unregister_filesystem(&ext4dev_fs_type); destroy_inodecache(); exit_ext4_xattr(); - exit_ext4_mballoc(); } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); diff --git a/trunk/fs/ext4/xattr.c b/trunk/fs/ext4/xattr.c index d7962139c010..86387302c2a9 100644 --- a/trunk/fs/ext4/xattr.c +++ b/trunk/fs/ext4/xattr.c @@ -480,7 +480,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, ea_bdebug(bh, "refcount now=0; freeing"); if (ce) mb_cache_entry_free(ce); - ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1); + ext4_free_blocks(handle, inode, bh->b_blocknr, 1); get_bh(bh); ext4_forget(handle, 1, inode, bh, bh->b_blocknr); } else { @@ -821,7 +821,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, new_bh = sb_getblk(sb, block); if (!new_bh) { getblk_failed: - ext4_free_blocks(handle, inode, block, 1, 1); + ext4_free_blocks(handle, inode, block, 1); error = -EIO; goto cleanup; } diff --git a/trunk/fs/inode.c b/trunk/fs/inode.c index 276ffd6b6fdd..ed35383d0b6c 100644 --- a/trunk/fs/inode.c +++ b/trunk/fs/inode.c @@ -1276,11 +1276,6 @@ void file_update_time(struct file *file) sync_it = 1; } - if (IS_I_VERSION(inode)) { - inode_inc_iversion(inode); - sync_it = 1; - } - if (sync_it) mark_inode_dirty_sync(inode); } diff --git a/trunk/fs/jbd2/checkpoint.c b/trunk/fs/jbd2/checkpoint.c index 1b7f282c1ae9..3fccde7ba008 100644 --- a/trunk/fs/jbd2/checkpoint.c +++ b/trunk/fs/jbd2/checkpoint.c @@ -232,8 +232,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ static int __process_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count, - transaction_t *transaction) + struct buffer_head **bhs, int *batch_count) { struct buffer_head *bh = jh2bh(jh); int ret = 0; @@ -251,7 +250,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; - transaction->t_chp_stats.cs_forced_to_close++; spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); jbd2_log_start_commit(journal, tid); @@ -281,7 +279,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, bhs[*batch_count] = bh; __buffer_relink_io(jh); jbd_unlock_bh_state(bh); - transaction->t_chp_stats.cs_written++; (*batch_count)++; if (*batch_count == NR_BATCH) { spin_unlock(&journal->j_list_lock); @@ -325,8 +322,6 @@ int jbd2_log_do_checkpoint(journal_t *journal) if (!journal->j_checkpoint_transactions) goto out; transaction = journal->j_checkpoint_transactions; - if (transaction->t_chp_stats.cs_chp_time == 0) - transaction->t_chp_stats.cs_chp_time = jiffies; this_tid = transaction->t_tid; restart: /* @@ -351,8 +346,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) retry = 1; break; } - retry = __process_buffer(journal, jh, bhs, &batch_count, - transaction); + retry = __process_buffer(journal, jh, bhs,&batch_count); if (!retry && lock_need_resched(&journal->j_list_lock)){ spin_unlock(&journal->j_list_lock); retry = 1; @@ -608,15 +602,15 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) /* * There is one special case to worry about: if we have just pulled the - * buffer off a running or committing transaction's checkpoing list, - * then even if the checkpoint list is empty, the transaction obviously - * cannot be dropped! + * buffer off a committing transaction's forget list, then even if the + * checkpoint list is empty, the transaction obviously cannot be + * dropped! * - * The locking here around t_state is a bit sleazy. + * The locking here around j_committing_transaction is a bit sleazy. * See the comment at the end of jbd2_journal_commit_transaction(). */ - if (transaction->t_state != T_FINISHED) { - JBUFFER_TRACE(jh, "belongs to running/committing transaction"); + if (transaction == journal->j_committing_transaction) { + JBUFFER_TRACE(jh, "belongs to committing transaction"); goto out; } diff --git a/trunk/fs/jbd2/commit.c b/trunk/fs/jbd2/commit.c index da8d0eb3b7b9..6986f334c643 100644 --- a/trunk/fs/jbd2/commit.c +++ b/trunk/fs/jbd2/commit.c @@ -20,8 +20,6 @@ #include #include #include -#include -#include /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -94,23 +92,19 @@ static int inverted_lock(journal_t *journal, struct buffer_head *bh) return 1; } -/* - * Done it all: now submit the commit record. We should have +/* Done it all: now write the commit record. We should have * cleaned up our previous buffers by now, so if we are in abort * mode we can now just skip the rest of the journal write * entirely. * * Returns 1 if the journal needs to be aborted or 0 on success */ -static int journal_submit_commit_record(journal_t *journal, - transaction_t *commit_transaction, - struct buffer_head **cbh, - __u32 crc32_sum) +static int journal_write_commit_record(journal_t *journal, + transaction_t *commit_transaction) { struct journal_head *descriptor; - struct commit_header *tmp; struct buffer_head *bh; - int ret; + int i, ret; int barrier_done = 0; if (is_journal_aborted(journal)) @@ -122,33 +116,21 @@ static int journal_submit_commit_record(journal_t *journal, bh = jh2bh(descriptor); - tmp = (struct commit_header *)bh->b_data; - tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); - tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); - tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); - - if (JBD2_HAS_COMPAT_FEATURE(journal, - JBD2_FEATURE_COMPAT_CHECKSUM)) { - tmp->h_chksum_type = JBD2_CRC32_CHKSUM; - tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; - tmp->h_chksum[0] = cpu_to_be32(crc32_sum); + /* AKPM: buglet - add `i' to tmp! */ + for (i = 0; i < bh->b_size; i += 512) { + journal_header_t *tmp = (journal_header_t*)bh->b_data; + tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); + tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); + tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); } - JBUFFER_TRACE(descriptor, "submit commit block"); - lock_buffer(bh); - + JBUFFER_TRACE(descriptor, "write commit block"); set_buffer_dirty(bh); - set_buffer_uptodate(bh); - bh->b_end_io = journal_end_buffer_io_sync; - - if (journal->j_flags & JBD2_BARRIER && - !JBD2_HAS_COMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { + if (journal->j_flags & JBD2_BARRIER) { set_buffer_ordered(bh); barrier_done = 1; } - ret = submit_bh(WRITE, bh); - + ret = sync_dirty_buffer(bh); /* is it possible for another commit to fail at roughly * the same time as this one? If so, we don't want to * trust the barrier flag in the super, but instead want @@ -169,72 +151,14 @@ static int journal_submit_commit_record(journal_t *journal, clear_buffer_ordered(bh); set_buffer_uptodate(bh); set_buffer_dirty(bh); - ret = submit_bh(WRITE, bh); + ret = sync_dirty_buffer(bh); } - *cbh = bh; - return ret; -} - -/* - * This function along with journal_submit_commit_record - * allows to write the commit record asynchronously. - */ -static int journal_wait_on_commit_record(struct buffer_head *bh) -{ - int ret = 0; - - clear_buffer_dirty(bh); - wait_on_buffer(bh); - - if (unlikely(!buffer_uptodate(bh))) - ret = -EIO; - put_bh(bh); /* One for getblk() */ - jbd2_journal_put_journal_head(bh2jh(bh)); + put_bh(bh); /* One for getblk() */ + jbd2_journal_put_journal_head(descriptor); - return ret; + return (ret == -EIO); } -/* - * Wait for all submitted IO to complete. - */ -static int journal_wait_on_locked_list(journal_t *journal, - transaction_t *commit_transaction) -{ - int ret = 0; - struct journal_head *jh; - - while (commit_transaction->t_locked_list) { - struct buffer_head *bh; - - jh = commit_transaction->t_locked_list->b_tprev; - bh = jh2bh(jh); - get_bh(bh); - if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); - if (unlikely(!buffer_uptodate(bh))) - ret = -EIO; - spin_lock(&journal->j_list_lock); - } - if (!inverted_lock(journal, bh)) { - put_bh(bh); - spin_lock(&journal->j_list_lock); - continue; - } - if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { - __jbd2_journal_unfile_buffer(jh); - jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); - put_bh(bh); - } else { - jbd_unlock_bh_state(bh); - } - put_bh(bh); - cond_resched_lock(&journal->j_list_lock); - } - return ret; - } - static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) { int i; @@ -350,21 +274,7 @@ static void journal_submit_data_buffers(journal_t *journal, journal_do_submit_data(wbuf, bufs); } -static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) -{ - struct page *page = bh->b_page; - char *addr; - __u32 checksum; - - addr = kmap_atomic(page, KM_USER0); - checksum = crc32_be(crc32_sum, - (void *)(addr + offset_in_page(bh->b_data)), bh->b_size); - kunmap_atomic(addr, KM_USER0); - - return checksum; -} - -static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, +static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, unsigned long long block) { tag->t_blocknr = cpu_to_be32(block & (u32)~0); @@ -380,7 +290,6 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, */ void jbd2_journal_commit_transaction(journal_t *journal) { - struct transaction_stats_s stats; transaction_t *commit_transaction; struct journal_head *jh, *new_jh, *descriptor; struct buffer_head **wbuf = journal->j_wbuf; @@ -396,8 +305,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) int tag_flag; int i; int tag_bytes = journal_tag_bytes(journal); - struct buffer_head *cbh = NULL; /* For transactional checksums */ - __u32 crc32_sum = ~0; /* * First job: lock down the current transaction and wait for @@ -430,11 +337,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_LOCKED; - stats.u.run.rs_wait = commit_transaction->t_max_wait; - stats.u.run.rs_locked = jiffies; - stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, - stats.u.run.rs_locked); - spin_lock(&commit_transaction->t_handle_lock); while (commit_transaction->t_updates) { DEFINE_WAIT(wait); @@ -505,10 +407,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ jbd2_journal_switch_revoke_table(journal); - stats.u.run.rs_flushing = jiffies; - stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked, - stats.u.run.rs_flushing); - commit_transaction->t_state = T_FLUSH; journal->j_committing_transaction = commit_transaction; journal->j_running_transaction = NULL; @@ -542,15 +440,38 @@ void jbd2_journal_commit_transaction(journal_t *journal) journal_submit_data_buffers(journal, commit_transaction); /* - * Wait for all previously submitted IO to complete if commit - * record is to be written synchronously. + * Wait for all previously submitted IO to complete. */ spin_lock(&journal->j_list_lock); - if (!JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) - err = journal_wait_on_locked_list(journal, - commit_transaction); + while (commit_transaction->t_locked_list) { + struct buffer_head *bh; + jh = commit_transaction->t_locked_list->b_tprev; + bh = jh2bh(jh); + get_bh(bh); + if (buffer_locked(bh)) { + spin_unlock(&journal->j_list_lock); + wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) + err = -EIO; + spin_lock(&journal->j_list_lock); + } + if (!inverted_lock(journal, bh)) { + put_bh(bh); + spin_lock(&journal->j_list_lock); + continue; + } + if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { + __jbd2_journal_unfile_buffer(jh); + jbd_unlock_bh_state(bh); + jbd2_journal_remove_journal_head(bh); + put_bh(bh); + } else { + jbd_unlock_bh_state(bh); + } + put_bh(bh); + cond_resched_lock(&journal->j_list_lock); + } spin_unlock(&journal->j_list_lock); if (err) @@ -577,12 +498,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ commit_transaction->t_state = T_COMMIT; - stats.u.run.rs_logging = jiffies; - stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, - stats.u.run.rs_logging); - stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits; - stats.u.run.rs_blocks_logged = 0; - descriptor = NULL; bufs = 0; while (commit_transaction->t_buffers) { @@ -724,15 +639,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) start_journal_io: for (i = 0; i < bufs; i++) { struct buffer_head *bh = wbuf[i]; - /* - * Compute checksum. - */ - if (JBD2_HAS_COMPAT_FEATURE(journal, - JBD2_FEATURE_COMPAT_CHECKSUM)) { - crc32_sum = - jbd2_checksum_data(crc32_sum, bh); - } - lock_buffer(bh); clear_buffer_dirty(bh); set_buffer_uptodate(bh); @@ -740,7 +646,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) submit_bh(WRITE, bh); } cond_resched(); - stats.u.run.rs_blocks_logged += bufs; /* Force a new descriptor to be generated next time round the loop. */ @@ -749,23 +654,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) } } - /* Done it all: now write the commit record asynchronously. */ - - if (JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { - err = journal_submit_commit_record(journal, commit_transaction, - &cbh, crc32_sum); - if (err) - __jbd2_journal_abort_hard(journal); - - spin_lock(&journal->j_list_lock); - err = journal_wait_on_locked_list(journal, - commit_transaction); - spin_unlock(&journal->j_list_lock); - if (err) - __jbd2_journal_abort_hard(journal); - } - /* Lo and behold: we have just managed to send a transaction to the log. Before we can commit it, wait for the IO so far to complete. Control buffers being written are on the @@ -865,14 +753,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) jbd_debug(3, "JBD: commit phase 6\n"); - if (!JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { - err = journal_submit_commit_record(journal, commit_transaction, - &cbh, crc32_sum); - if (err) - __jbd2_journal_abort_hard(journal); - } - err = journal_wait_on_commit_record(cbh); + if (journal_write_commit_record(journal, commit_transaction)) + err = -EIO; if (err) jbd2_journal_abort(journal, err); @@ -934,7 +816,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) cp_transaction = jh->b_cp_transaction; if (cp_transaction) { JBUFFER_TRACE(jh, "remove from old cp transaction"); - cp_transaction->t_chp_stats.cs_dropped++; __jbd2_journal_remove_checkpoint(jh); } @@ -986,10 +867,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) } spin_unlock(&journal->j_list_lock); /* - * This is a bit sleazy. We use j_list_lock to protect transition - * of a transaction into T_FINISHED state and calling - * __jbd2_journal_drop_transaction(). Otherwise we could race with - * other checkpointing code processing the transaction... + * This is a bit sleazy. We borrow j_list_lock to protect + * journal->j_committing_transaction in __jbd2_journal_remove_checkpoint. + * Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but + * it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint */ spin_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); @@ -1009,36 +890,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) J_ASSERT(commit_transaction->t_state == T_COMMIT); - commit_transaction->t_start = jiffies; - stats.u.run.rs_logging = jbd2_time_diff(stats.u.run.rs_logging, - commit_transaction->t_start); - - /* - * File the transaction for history - */ - stats.ts_type = JBD2_STATS_RUN; - stats.ts_tid = commit_transaction->t_tid; - stats.u.run.rs_handle_count = commit_transaction->t_handle_count; - spin_lock(&journal->j_history_lock); - memcpy(journal->j_history + journal->j_history_cur, &stats, - sizeof(stats)); - if (++journal->j_history_cur == journal->j_history_max) - journal->j_history_cur = 0; - - /* - * Calculate overall stats - */ - journal->j_stats.ts_tid++; - journal->j_stats.u.run.rs_wait += stats.u.run.rs_wait; - journal->j_stats.u.run.rs_running += stats.u.run.rs_running; - journal->j_stats.u.run.rs_locked += stats.u.run.rs_locked; - journal->j_stats.u.run.rs_flushing += stats.u.run.rs_flushing; - journal->j_stats.u.run.rs_logging += stats.u.run.rs_logging; - journal->j_stats.u.run.rs_handle_count += stats.u.run.rs_handle_count; - journal->j_stats.u.run.rs_blocks += stats.u.run.rs_blocks; - journal->j_stats.u.run.rs_blocks_logged += stats.u.run.rs_blocks_logged; - spin_unlock(&journal->j_history_lock); - commit_transaction->t_state = T_FINISHED; J_ASSERT(commit_transaction == journal->j_committing_transaction); journal->j_commit_sequence = commit_transaction->t_tid; diff --git a/trunk/fs/jbd2/journal.c b/trunk/fs/jbd2/journal.c index 96ba846992e9..6ddc5531587c 100644 --- a/trunk/fs/jbd2/journal.c +++ b/trunk/fs/jbd2/journal.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include @@ -641,312 +640,6 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) return jbd2_journal_add_journal_head(bh); } -struct jbd2_stats_proc_session { - journal_t *journal; - struct transaction_stats_s *stats; - int start; - int max; -}; - -static void *jbd2_history_skip_empty(struct jbd2_stats_proc_session *s, - struct transaction_stats_s *ts, - int first) -{ - if (ts == s->stats + s->max) - ts = s->stats; - if (!first && ts == s->stats + s->start) - return NULL; - while (ts->ts_type == 0) { - ts++; - if (ts == s->stats + s->max) - ts = s->stats; - if (ts == s->stats + s->start) - return NULL; - } - return ts; - -} - -static void *jbd2_seq_history_start(struct seq_file *seq, loff_t *pos) -{ - struct jbd2_stats_proc_session *s = seq->private; - struct transaction_stats_s *ts; - int l = *pos; - - if (l == 0) - return SEQ_START_TOKEN; - ts = jbd2_history_skip_empty(s, s->stats + s->start, 1); - if (!ts) - return NULL; - l--; - while (l) { - ts = jbd2_history_skip_empty(s, ++ts, 0); - if (!ts) - break; - l--; - } - return ts; -} - -static void *jbd2_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct jbd2_stats_proc_session *s = seq->private; - struct transaction_stats_s *ts = v; - - ++*pos; - if (v == SEQ_START_TOKEN) - return jbd2_history_skip_empty(s, s->stats + s->start, 1); - else - return jbd2_history_skip_empty(s, ++ts, 0); -} - -static int jbd2_seq_history_show(struct seq_file *seq, void *v) -{ - struct transaction_stats_s *ts = v; - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s " - "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid", - "wait", "run", "lock", "flush", "log", "hndls", - "block", "inlog", "ctime", "write", "drop", - "close"); - return 0; - } - if (ts->ts_type == JBD2_STATS_RUN) - seq_printf(seq, "%-4s %-5lu %-5u %-5u %-5u %-5u %-5u " - "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid, - jiffies_to_msecs(ts->u.run.rs_wait), - jiffies_to_msecs(ts->u.run.rs_running), - jiffies_to_msecs(ts->u.run.rs_locked), - jiffies_to_msecs(ts->u.run.rs_flushing), - jiffies_to_msecs(ts->u.run.rs_logging), - ts->u.run.rs_handle_count, - ts->u.run.rs_blocks, - ts->u.run.rs_blocks_logged); - else if (ts->ts_type == JBD2_STATS_CHECKPOINT) - seq_printf(seq, "%-4s %-5lu %48s %-5u %-5lu %-5lu %-5lu\n", - "C", ts->ts_tid, " ", - jiffies_to_msecs(ts->u.chp.cs_chp_time), - ts->u.chp.cs_written, ts->u.chp.cs_dropped, - ts->u.chp.cs_forced_to_close); - else - J_ASSERT(0); - return 0; -} - -static void jbd2_seq_history_stop(struct seq_file *seq, void *v) -{ -} - -static struct seq_operations jbd2_seq_history_ops = { - .start = jbd2_seq_history_start, - .next = jbd2_seq_history_next, - .stop = jbd2_seq_history_stop, - .show = jbd2_seq_history_show, -}; - -static int jbd2_seq_history_open(struct inode *inode, struct file *file) -{ - journal_t *journal = PDE(inode)->data; - struct jbd2_stats_proc_session *s; - int rc, size; - - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s == NULL) - return -ENOMEM; - size = sizeof(struct transaction_stats_s) * journal->j_history_max; - s->stats = kmalloc(size, GFP_KERNEL); - if (s->stats == NULL) { - kfree(s); - return -ENOMEM; - } - spin_lock(&journal->j_history_lock); - memcpy(s->stats, journal->j_history, size); - s->max = journal->j_history_max; - s->start = journal->j_history_cur % s->max; - spin_unlock(&journal->j_history_lock); - - rc = seq_open(file, &jbd2_seq_history_ops); - if (rc == 0) { - struct seq_file *m = file->private_data; - m->private = s; - } else { - kfree(s->stats); - kfree(s); - } - return rc; - -} - -static int jbd2_seq_history_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct jbd2_stats_proc_session *s = seq->private; - - kfree(s->stats); - kfree(s); - return seq_release(inode, file); -} - -static struct file_operations jbd2_seq_history_fops = { - .owner = THIS_MODULE, - .open = jbd2_seq_history_open, - .read = seq_read, - .llseek = seq_lseek, - .release = jbd2_seq_history_release, -}; - -static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos) -{ - return *pos ? NULL : SEQ_START_TOKEN; -} - -static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) -{ - return NULL; -} - -static int jbd2_seq_info_show(struct seq_file *seq, void *v) -{ - struct jbd2_stats_proc_session *s = seq->private; - - if (v != SEQ_START_TOKEN) - return 0; - seq_printf(seq, "%lu transaction, each upto %u blocks\n", - s->stats->ts_tid, - s->journal->j_max_transaction_buffers); - if (s->stats->ts_tid == 0) - return 0; - seq_printf(seq, "average: \n %ums waiting for transaction\n", - jiffies_to_msecs(s->stats->u.run.rs_wait / s->stats->ts_tid)); - seq_printf(seq, " %ums running transaction\n", - jiffies_to_msecs(s->stats->u.run.rs_running / s->stats->ts_tid)); - seq_printf(seq, " %ums transaction was being locked\n", - jiffies_to_msecs(s->stats->u.run.rs_locked / s->stats->ts_tid)); - seq_printf(seq, " %ums flushing data (in ordered mode)\n", - jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid)); - seq_printf(seq, " %ums logging transaction\n", - jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid)); - seq_printf(seq, " %lu handles per transaction\n", - s->stats->u.run.rs_handle_count / s->stats->ts_tid); - seq_printf(seq, " %lu blocks per transaction\n", - s->stats->u.run.rs_blocks / s->stats->ts_tid); - seq_printf(seq, " %lu logged blocks per transaction\n", - s->stats->u.run.rs_blocks_logged / s->stats->ts_tid); - return 0; -} - -static void jbd2_seq_info_stop(struct seq_file *seq, void *v) -{ -} - -static struct seq_operations jbd2_seq_info_ops = { - .start = jbd2_seq_info_start, - .next = jbd2_seq_info_next, - .stop = jbd2_seq_info_stop, - .show = jbd2_seq_info_show, -}; - -static int jbd2_seq_info_open(struct inode *inode, struct file *file) -{ - journal_t *journal = PDE(inode)->data; - struct jbd2_stats_proc_session *s; - int rc, size; - - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s == NULL) - return -ENOMEM; - size = sizeof(struct transaction_stats_s); - s->stats = kmalloc(size, GFP_KERNEL); - if (s->stats == NULL) { - kfree(s); - return -ENOMEM; - } - spin_lock(&journal->j_history_lock); - memcpy(s->stats, &journal->j_stats, size); - s->journal = journal; - spin_unlock(&journal->j_history_lock); - - rc = seq_open(file, &jbd2_seq_info_ops); - if (rc == 0) { - struct seq_file *m = file->private_data; - m->private = s; - } else { - kfree(s->stats); - kfree(s); - } - return rc; - -} - -static int jbd2_seq_info_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct jbd2_stats_proc_session *s = seq->private; - kfree(s->stats); - kfree(s); - return seq_release(inode, file); -} - -static struct file_operations jbd2_seq_info_fops = { - .owner = THIS_MODULE, - .open = jbd2_seq_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = jbd2_seq_info_release, -}; - -static struct proc_dir_entry *proc_jbd2_stats; - -static void jbd2_stats_proc_init(journal_t *journal) -{ - char name[BDEVNAME_SIZE]; - - snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); - journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); - if (journal->j_proc_entry) { - struct proc_dir_entry *p; - p = create_proc_entry("history", S_IRUGO, - journal->j_proc_entry); - if (p) { - p->proc_fops = &jbd2_seq_history_fops; - p->data = journal; - p = create_proc_entry("info", S_IRUGO, - journal->j_proc_entry); - if (p) { - p->proc_fops = &jbd2_seq_info_fops; - p->data = journal; - } - } - } -} - -static void jbd2_stats_proc_exit(journal_t *journal) -{ - char name[BDEVNAME_SIZE]; - - snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); - remove_proc_entry("info", journal->j_proc_entry); - remove_proc_entry("history", journal->j_proc_entry); - remove_proc_entry(name, proc_jbd2_stats); -} - -static void journal_init_stats(journal_t *journal) -{ - int size; - - if (!proc_jbd2_stats) - return; - - journal->j_history_max = 100; - size = sizeof(struct transaction_stats_s) * journal->j_history_max; - journal->j_history = kzalloc(size, GFP_KERNEL); - if (!journal->j_history) { - journal->j_history_max = 0; - return; - } - spin_lock_init(&journal->j_history_lock); -} - /* * Management for journal control blocks: functions to create and * destroy journal_t structures, and to initialise and read existing @@ -988,9 +681,6 @@ static journal_t * journal_init_common (void) kfree(journal); goto fail; } - - journal_init_stats(journal); - return journal; fail: return NULL; @@ -1045,7 +735,6 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, journal->j_fs_dev = fs_dev; journal->j_blk_offset = start; journal->j_maxlen = len; - jbd2_stats_proc_init(journal); bh = __getblk(journal->j_dev, start, journal->j_blocksize); J_ASSERT(bh != NULL); @@ -1084,7 +773,6 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; journal->j_blocksize = inode->i_sb->s_blocksize; - jbd2_stats_proc_init(journal); /* journal descriptor can store up to n blocks -bzzz */ n = journal->j_blocksize / sizeof(journal_block_tag_t); @@ -1465,8 +1153,6 @@ void jbd2_journal_destroy(journal_t *journal) brelse(journal->j_sb_buffer); } - if (journal->j_proc_entry) - jbd2_stats_proc_exit(journal); if (journal->j_inode) iput(journal->j_inode); if (journal->j_revoke) @@ -1578,32 +1264,6 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, return 1; } -/* - * jbd2_journal_clear_features () - Clear a given journal feature in the - * superblock - * @journal: Journal to act on. - * @compat: bitmask of compatible features - * @ro: bitmask of features that force read-only mount - * @incompat: bitmask of incompatible features - * - * Clear a given journal feature as present on the - * superblock. - */ -void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, - unsigned long ro, unsigned long incompat) -{ - journal_superblock_t *sb; - - jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n", - compat, ro, incompat); - - sb = journal->j_superblock; - - sb->s_feature_compat &= ~cpu_to_be32(compat); - sb->s_feature_ro_compat &= ~cpu_to_be32(ro); - sb->s_feature_incompat &= ~cpu_to_be32(incompat); -} -EXPORT_SYMBOL(jbd2_journal_clear_features); /** * int jbd2_journal_update_format () - Update on-disk journal structure. @@ -1973,7 +1633,7 @@ static int journal_init_jbd2_journal_head_cache(void) jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", sizeof(struct journal_head), 0, /* offset */ - SLAB_TEMPORARY, /* flags */ + 0, /* flags */ NULL); /* ctor */ retval = 0; if (jbd2_journal_head_cache == 0) { @@ -2240,28 +1900,6 @@ static void __exit jbd2_remove_debugfs_entry(void) #endif -#ifdef CONFIG_PROC_FS - -#define JBD2_STATS_PROC_NAME "fs/jbd2" - -static void __init jbd2_create_jbd_stats_proc_entry(void) -{ - proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL); -} - -static void __exit jbd2_remove_jbd_stats_proc_entry(void) -{ - if (proc_jbd2_stats) - remove_proc_entry(JBD2_STATS_PROC_NAME, NULL); -} - -#else - -#define jbd2_create_jbd_stats_proc_entry() do {} while (0) -#define jbd2_remove_jbd_stats_proc_entry() do {} while (0) - -#endif - struct kmem_cache *jbd2_handle_cache; static int __init journal_init_handle_cache(void) @@ -2269,7 +1907,7 @@ static int __init journal_init_handle_cache(void) jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle", sizeof(handle_t), 0, /* offset */ - SLAB_TEMPORARY, /* flags */ + 0, /* flags */ NULL); /* ctor */ if (jbd2_handle_cache == NULL) { printk(KERN_EMERG "JBD: failed to create handle cache\n"); @@ -2317,7 +1955,6 @@ static int __init journal_init(void) if (ret != 0) jbd2_journal_destroy_caches(); jbd2_create_debugfs_entry(); - jbd2_create_jbd_stats_proc_entry(); return ret; } @@ -2329,7 +1966,6 @@ static void __exit journal_exit(void) printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); #endif jbd2_remove_debugfs_entry(); - jbd2_remove_jbd_stats_proc_entry(); jbd2_journal_destroy_caches(); } diff --git a/trunk/fs/jbd2/recovery.c b/trunk/fs/jbd2/recovery.c index 921680663fa2..d0ce627539ef 100644 --- a/trunk/fs/jbd2/recovery.c +++ b/trunk/fs/jbd2/recovery.c @@ -21,7 +21,6 @@ #include #include #include -#include #endif /* @@ -317,37 +316,6 @@ static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag return block; } -/* - * calc_chksums calculates the checksums for the blocks described in the - * descriptor block. - */ -static int calc_chksums(journal_t *journal, struct buffer_head *bh, - unsigned long *next_log_block, __u32 *crc32_sum) -{ - int i, num_blks, err; - unsigned long io_block; - struct buffer_head *obh; - - num_blks = count_tags(journal, bh); - /* Calculate checksum of the descriptor block. */ - *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); - - for (i = 0; i < num_blks; i++) { - io_block = (*next_log_block)++; - wrap(journal, *next_log_block); - err = jread(&obh, journal, io_block); - if (err) { - printk(KERN_ERR "JBD: IO error %d recovering block " - "%lu in log\n", err, io_block); - return 1; - } else { - *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, - obh->b_size); - } - } - return 0; -} - static int do_one_pass(journal_t *journal, struct recovery_info *info, enum passtype pass) { @@ -360,7 +328,6 @@ static int do_one_pass(journal_t *journal, unsigned int sequence; int blocktype; int tag_bytes = journal_tag_bytes(journal); - __u32 crc32_sum = ~0; /* Transactional Checksums */ /* Precompute the maximum metadata descriptors in a descriptor block */ int MAX_BLOCKS_PER_DESC; @@ -452,26 +419,12 @@ static int do_one_pass(journal_t *journal, switch(blocktype) { case JBD2_DESCRIPTOR_BLOCK: /* If it is a valid descriptor block, replay it - * in pass REPLAY; if journal_checksums enabled, then - * calculate checksums in PASS_SCAN, otherwise, - * just skip over the blocks it describes. */ + * in pass REPLAY; otherwise, just skip over the + * blocks it describes. */ if (pass != PASS_REPLAY) { - if (pass == PASS_SCAN && - JBD2_HAS_COMPAT_FEATURE(journal, - JBD2_FEATURE_COMPAT_CHECKSUM) && - !info->end_transaction) { - if (calc_chksums(journal, bh, - &next_log_block, - &crc32_sum)) { - put_bh(bh); - break; - } - put_bh(bh); - continue; - } next_log_block += count_tags(journal, bh); wrap(journal, next_log_block); - put_bh(bh); + brelse(bh); continue; } @@ -563,96 +516,9 @@ static int do_one_pass(journal_t *journal, continue; case JBD2_COMMIT_BLOCK: - /* How to differentiate between interrupted commit - * and journal corruption ? - * - * {nth transaction} - * Checksum Verification Failed - * | - * ____________________ - * | | - * async_commit sync_commit - * | | - * | GO TO NEXT "Journal Corruption" - * | TRANSACTION - * | - * {(n+1)th transanction} - * | - * _______|______________ - * | | - * Commit block found Commit block not found - * | | - * "Journal Corruption" | - * _____________|_________ - * | | - * nth trans corrupt OR nth trans - * and (n+1)th interrupted interrupted - * before commit block - * could reach the disk. - * (Cannot find the difference in above - * mentioned conditions. Hence assume - * "Interrupted Commit".) - */ - - /* Found an expected commit block: if checksums - * are present verify them in PASS_SCAN; else not - * much to do other than move on to the next sequence + /* Found an expected commit block: not much to + * do other than move on to the next sequence * number. */ - if (pass == PASS_SCAN && - JBD2_HAS_COMPAT_FEATURE(journal, - JBD2_FEATURE_COMPAT_CHECKSUM)) { - int chksum_err, chksum_seen; - struct commit_header *cbh = - (struct commit_header *)bh->b_data; - unsigned found_chksum = - be32_to_cpu(cbh->h_chksum[0]); - - chksum_err = chksum_seen = 0; - - if (info->end_transaction) { - printk(KERN_ERR "JBD: Transaction %u " - "found to be corrupt.\n", - next_commit_ID - 1); - brelse(bh); - break; - } - - if (crc32_sum == found_chksum && - cbh->h_chksum_type == JBD2_CRC32_CHKSUM && - cbh->h_chksum_size == - JBD2_CRC32_CHKSUM_SIZE) - chksum_seen = 1; - else if (!(cbh->h_chksum_type == 0 && - cbh->h_chksum_size == 0 && - found_chksum == 0 && - !chksum_seen)) - /* - * If fs is mounted using an old kernel and then - * kernel with journal_chksum is used then we - * get a situation where the journal flag has - * checksum flag set but checksums are not - * present i.e chksum = 0, in the individual - * commit blocks. - * Hence to avoid checksum failures, in this - * situation, this extra check is added. - */ - chksum_err = 1; - - if (chksum_err) { - info->end_transaction = next_commit_ID; - - if (!JBD2_HAS_COMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ - printk(KERN_ERR - "JBD: Transaction %u " - "found to be corrupt.\n", - next_commit_ID); - brelse(bh); - break; - } - } - crc32_sum = ~0; - } brelse(bh); next_commit_ID++; continue; @@ -688,10 +554,9 @@ static int do_one_pass(journal_t *journal, * transaction marks the end of the valid log. */ - if (pass == PASS_SCAN) { - if (!info->end_transaction) - info->end_transaction = next_commit_ID; - } else { + if (pass == PASS_SCAN) + info->end_transaction = next_commit_ID; + else { /* It's really bad news if different passes end up at * different places (but possible due to IO errors). */ if (info->end_transaction != next_commit_ID) { diff --git a/trunk/fs/jbd2/revoke.c b/trunk/fs/jbd2/revoke.c index df36f42e19e1..3595fd432d5b 100644 --- a/trunk/fs/jbd2/revoke.c +++ b/trunk/fs/jbd2/revoke.c @@ -171,15 +171,13 @@ int __init jbd2_journal_init_revoke_caches(void) { jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", sizeof(struct jbd2_revoke_record_s), - 0, - SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, - NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); if (jbd2_revoke_record_cache == 0) return -ENOMEM; jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", sizeof(struct jbd2_revoke_table_s), - 0, SLAB_TEMPORARY, NULL); + 0, 0, NULL); if (jbd2_revoke_table_cache == 0) { kmem_cache_destroy(jbd2_revoke_record_cache); jbd2_revoke_record_cache = NULL; diff --git a/trunk/fs/jbd2/transaction.c b/trunk/fs/jbd2/transaction.c index b9b0b6f899b9..b1fcf2b3dca3 100644 --- a/trunk/fs/jbd2/transaction.c +++ b/trunk/fs/jbd2/transaction.c @@ -54,13 +54,11 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) spin_lock_init(&transaction->t_handle_lock); /* Set up the commit timer for the new transaction. */ - journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); + journal->j_commit_timer.expires = transaction->t_expires; add_timer(&journal->j_commit_timer); J_ASSERT(journal->j_running_transaction == NULL); journal->j_running_transaction = transaction; - transaction->t_max_wait = 0; - transaction->t_start = jiffies; return transaction; } @@ -87,7 +85,6 @@ static int start_this_handle(journal_t *journal, handle_t *handle) int nblocks = handle->h_buffer_credits; transaction_t *new_transaction = NULL; int ret = 0; - unsigned long ts = jiffies; if (nblocks > journal->j_max_transaction_buffers) { printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", @@ -220,12 +217,6 @@ static int start_this_handle(journal_t *journal, handle_t *handle) /* OK, account for the buffers that this operation expects to * use and add the handle to the running transaction. */ - if (time_after(transaction->t_start, ts)) { - ts = jbd2_time_diff(ts, transaction->t_start); - if (ts > transaction->t_max_wait) - transaction->t_max_wait = ts; - } - handle->h_transaction = transaction; transaction->t_outstanding_credits += nblocks; transaction->t_updates++; @@ -241,8 +232,6 @@ static int start_this_handle(journal_t *journal, handle_t *handle) return ret; } -static struct lock_class_key jbd2_handle_key; - /* Allocate a new handle. This should probably be in a slab... */ static handle_t *new_handle(int nblocks) { @@ -253,9 +242,6 @@ static handle_t *new_handle(int nblocks) handle->h_buffer_credits = nblocks; handle->h_ref = 1; - lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle", - &jbd2_handle_key, 0); - return handle; } @@ -298,11 +284,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) jbd2_free_handle(handle); current->journal_info = NULL; handle = ERR_PTR(err); - goto out; } - - lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); -out: return handle; } @@ -1182,7 +1164,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) } /* That test should have eliminated the following case: */ - J_ASSERT_JH(jh, jh->b_frozen_data == NULL); + J_ASSERT_JH(jh, jh->b_frozen_data == 0); JBUFFER_TRACE(jh, "file as BJ_Metadata"); spin_lock(&journal->j_list_lock); @@ -1428,8 +1410,6 @@ int jbd2_journal_stop(handle_t *handle) spin_unlock(&journal->j_state_lock); } - lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); - jbd2_free_handle(handle); return err; } @@ -1532,7 +1512,7 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); if (jh->b_jlist != BJ_None) - J_ASSERT_JH(jh, transaction != NULL); + J_ASSERT_JH(jh, transaction != 0); switch (jh->b_jlist) { case BJ_None: @@ -1601,11 +1581,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) if (buffer_locked(bh) || buffer_dirty(bh)) goto out; - if (jh->b_next_transaction != NULL) + if (jh->b_next_transaction != 0) goto out; spin_lock(&journal->j_list_lock); - if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) { + if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) { if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { /* A written-back ordered data buffer */ JBUFFER_TRACE(jh, "release data"); @@ -1613,7 +1593,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) jbd2_journal_remove_journal_head(bh); __brelse(bh); } - } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { + } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) { /* written-back checkpointed metadata buffer */ if (jh->b_jlist == BJ_None) { JBUFFER_TRACE(jh, "remove from checkpoint list"); @@ -1973,7 +1953,7 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); J_ASSERT_JH(jh, jh->b_transaction == transaction || - jh->b_transaction == NULL); + jh->b_transaction == 0); if (jh->b_transaction && jh->b_jlist == jlist) return; diff --git a/trunk/fs/ocfs2/cluster/sys.c b/trunk/fs/ocfs2/cluster/sys.c index 0c095ce7723d..a4b07730b2e1 100644 --- a/trunk/fs/ocfs2/cluster/sys.c +++ b/trunk/fs/ocfs2/cluster/sys.c @@ -64,7 +64,7 @@ int o2cb_sys_init(void) { int ret; - o2cb_kset = kset_create_and_add("o2cb", NULL, NULL); + o2cb_kset = kset_create_and_add("o2cb", NULL, fs_kobj); if (!o2cb_kset) return -ENOMEM; diff --git a/trunk/fs/read_write.c b/trunk/fs/read_write.c index 1c177f29e1b7..c4d3d17923f1 100644 --- a/trunk/fs/read_write.c +++ b/trunk/fs/read_write.c @@ -446,7 +446,6 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) } return seg; } -EXPORT_SYMBOL(iov_shorten); ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) diff --git a/trunk/include/asm-arm/bitops.h b/trunk/include/asm-arm/bitops.h index 5c60bfc1a84d..47a6b086eee2 100644 --- a/trunk/include/asm-arm/bitops.h +++ b/trunk/include/asm-arm/bitops.h @@ -310,8 +310,6 @@ static inline int constant_fls(int x) _find_first_zero_bit_le(p,sz) #define ext2_find_next_zero_bit(p,sz,off) \ _find_next_zero_bit_le(p,sz,off) -#define ext2_find_next_bit(p, sz, off) \ - _find_next_bit_le(p, sz, off) /* * Minix is defined to use little-endian byte ordering. diff --git a/trunk/include/asm-generic/bitops/ext2-non-atomic.h b/trunk/include/asm-generic/bitops/ext2-non-atomic.h index 63cf822431a2..1697404afa05 100644 --- a/trunk/include/asm-generic/bitops/ext2-non-atomic.h +++ b/trunk/include/asm-generic/bitops/ext2-non-atomic.h @@ -14,7 +14,5 @@ generic_find_first_zero_le_bit((unsigned long *)(addr), (size)) #define ext2_find_next_zero_bit(addr, size, off) \ generic_find_next_zero_le_bit((unsigned long *)(addr), (size), (off)) -#define ext2_find_next_bit(addr, size, off) \ - generic_find_next_le_bit((unsigned long *)(addr), (size), (off)) #endif /* _ASM_GENERIC_BITOPS_EXT2_NON_ATOMIC_H_ */ diff --git a/trunk/include/asm-generic/bitops/le.h b/trunk/include/asm-generic/bitops/le.h index 80e3bf13b2b9..b9c7e5d2d2ad 100644 --- a/trunk/include/asm-generic/bitops/le.h +++ b/trunk/include/asm-generic/bitops/le.h @@ -20,8 +20,6 @@ #define generic___test_and_clear_le_bit(nr, addr) __test_and_clear_bit(nr, addr) #define generic_find_next_zero_le_bit(addr, size, offset) find_next_zero_bit(addr, size, offset) -#define generic_find_next_le_bit(addr, size, offset) \ - find_next_bit(addr, size, offset) #elif defined(__BIG_ENDIAN) @@ -44,8 +42,6 @@ extern unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned long size, unsigned long offset); -extern unsigned long generic_find_next_le_bit(const unsigned long *addr, - unsigned long size, unsigned long offset); #else #error "Please fix " diff --git a/trunk/include/asm-m68k/bitops.h b/trunk/include/asm-m68k/bitops.h index 83d1f286230b..2976b5d68e96 100644 --- a/trunk/include/asm-m68k/bitops.h +++ b/trunk/include/asm-m68k/bitops.h @@ -410,8 +410,6 @@ static inline int ext2_find_next_zero_bit(const void *vaddr, unsigned size, res = ext2_find_first_zero_bit (p, size - 32 * (p - addr)); return (p - addr) * 32 + res; } -#define ext2_find_next_bit(addr, size, off) \ - generic_find_next_le_bit((unsigned long *)(addr), (size), (off)) #endif /* __KERNEL__ */ diff --git a/trunk/include/asm-m68knommu/bitops.h b/trunk/include/asm-m68knommu/bitops.h index f43afe1fc3b3..f8dfb7ba2e25 100644 --- a/trunk/include/asm-m68knommu/bitops.h +++ b/trunk/include/asm-m68knommu/bitops.h @@ -294,8 +294,6 @@ static __inline__ unsigned long ext2_find_next_zero_bit(void *addr, unsigned lon return result + ffz(__swab32(tmp)); } -#define ext2_find_next_bit(addr, size, off) \ - generic_find_next_le_bit((unsigned long *)(addr), (size), (off)) #include #endif /* __KERNEL__ */ diff --git a/trunk/include/asm-powerpc/bitops.h b/trunk/include/asm-powerpc/bitops.h index 220d9a781ab9..733b4af7f4f1 100644 --- a/trunk/include/asm-powerpc/bitops.h +++ b/trunk/include/asm-powerpc/bitops.h @@ -359,8 +359,6 @@ static __inline__ int test_le_bit(unsigned long nr, unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned long size, unsigned long offset); -unsigned long generic_find_next_le_bit(const unsigned long *addr, - unsigned long size, unsigned long offset); /* Bitmap functions for the ext2 filesystem */ #define ext2_set_bit(nr,addr) \ @@ -380,8 +378,6 @@ unsigned long generic_find_next_le_bit(const unsigned long *addr, #define ext2_find_next_zero_bit(addr, size, off) \ generic_find_next_zero_le_bit((unsigned long*)addr, size, off) -#define ext2_find_next_bit(addr, size, off) \ - generic_find_next_le_bit((unsigned long *)addr, size, off) /* Bitmap functions for the minix filesystem. */ #define minix_test_and_set_bit(nr,addr) \ diff --git a/trunk/include/asm-s390/bitops.h b/trunk/include/asm-s390/bitops.h index dba6fecad0be..34d9a6357c38 100644 --- a/trunk/include/asm-s390/bitops.h +++ b/trunk/include/asm-s390/bitops.h @@ -772,8 +772,6 @@ static inline int sched_find_first_bit(unsigned long *b) test_and_clear_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) #define ext2_test_bit(nr, addr) \ test_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) -#define ext2_find_next_bit(addr, size, off) \ - generic_find_next_le_bit((unsigned long *)(addr), (size), (off)) #ifndef __s390x__ diff --git a/trunk/include/linux/buffer_head.h b/trunk/include/linux/buffer_head.h index e98801f06dcc..da0d83fbadc0 100644 --- a/trunk/include/linux/buffer_head.h +++ b/trunk/include/linux/buffer_head.h @@ -192,8 +192,6 @@ int sync_dirty_buffer(struct buffer_head *bh); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); -int bh_uptodate_or_lock(struct buffer_head *bh); -int bh_submit_read(struct buffer_head *bh); extern int buffer_heads_over_limit; diff --git a/trunk/include/linux/ext4_fs.h b/trunk/include/linux/ext4_fs.h index 1852313fc7c7..97dd409d5f4a 100644 --- a/trunk/include/linux/ext4_fs.h +++ b/trunk/include/linux/ext4_fs.h @@ -20,8 +20,6 @@ #include #include -#include - /* * The second extended filesystem constants/structures */ @@ -53,50 +51,6 @@ #define ext4_debug(f, a...) do {} while (0) #endif -#define EXT4_MULTIBLOCK_ALLOCATOR 1 - -/* prefer goal again. length */ -#define EXT4_MB_HINT_MERGE 1 -/* blocks already reserved */ -#define EXT4_MB_HINT_RESERVED 2 -/* metadata is being allocated */ -#define EXT4_MB_HINT_METADATA 4 -/* first blocks in the file */ -#define EXT4_MB_HINT_FIRST 8 -/* search for the best chunk */ -#define EXT4_MB_HINT_BEST 16 -/* data is being allocated */ -#define EXT4_MB_HINT_DATA 32 -/* don't preallocate (for tails) */ -#define EXT4_MB_HINT_NOPREALLOC 64 -/* allocate for locality group */ -#define EXT4_MB_HINT_GROUP_ALLOC 128 -/* allocate goal blocks or none */ -#define EXT4_MB_HINT_GOAL_ONLY 256 -/* goal is meaningful */ -#define EXT4_MB_HINT_TRY_GOAL 512 - -struct ext4_allocation_request { - /* target inode for block we're allocating */ - struct inode *inode; - /* logical block in target inode */ - ext4_lblk_t logical; - /* phys. target (a hint) */ - ext4_fsblk_t goal; - /* the closest logical allocated block to the left */ - ext4_lblk_t lleft; - /* phys. block for ^^^ */ - ext4_fsblk_t pleft; - /* the closest logical allocated block to the right */ - ext4_lblk_t lright; - /* phys. block for ^^^ */ - ext4_fsblk_t pright; - /* how many blocks we want to allocate */ - unsigned long len; - /* flags. see above EXT4_MB_HINT_* */ - unsigned long flags; -}; - /* * Special inodes numbers */ @@ -119,8 +73,8 @@ struct ext4_allocation_request { * Macro-instructions used to manage several block sizes */ #define EXT4_MIN_BLOCK_SIZE 1024 -#define EXT4_MAX_BLOCK_SIZE 65536 -#define EXT4_MIN_BLOCK_LOG_SIZE 10 +#define EXT4_MAX_BLOCK_SIZE 4096 +#define EXT4_MIN_BLOCK_LOG_SIZE 10 #ifdef __KERNEL__ # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) #else @@ -164,11 +118,6 @@ struct ext4_group_desc __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ __le32 bg_inode_table_hi; /* Inodes table block MSB */ - __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */ - __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ - __le16 bg_used_dirs_count_hi; /* Directories count MSB */ - __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ - __u32 bg_reserved2[3]; }; #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ @@ -229,9 +178,8 @@ struct ext4_group_desc #define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ -#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ +#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ #define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ @@ -289,7 +237,6 @@ struct ext4_new_group_data { #endif #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) -#define EXT4_IOC_MIGRATE _IO('f', 7) /* * ioctl commands in 32 bit emulation @@ -328,18 +275,18 @@ struct ext4_mount_options { struct ext4_inode { __le16 i_mode; /* File mode */ __le16 i_uid; /* Low 16 bits of Owner Uid */ - __le32 i_size_lo; /* Size in bytes */ + __le32 i_size; /* Size in bytes */ __le32 i_atime; /* Access time */ __le32 i_ctime; /* Inode Change time */ __le32 i_mtime; /* Modification time */ __le32 i_dtime; /* Deletion Time */ __le16 i_gid; /* Low 16 bits of Group Id */ __le16 i_links_count; /* Links count */ - __le32 i_blocks_lo; /* Blocks count */ + __le32 i_blocks; /* Blocks count */ __le32 i_flags; /* File flags */ union { struct { - __le32 l_i_version; + __u32 l_i_reserved1; } linux1; struct { __u32 h_i_translator; @@ -350,12 +297,12 @@ struct ext4_inode { } osd1; /* OS dependent 1 */ __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ __le32 i_generation; /* File version (for NFS) */ - __le32 i_file_acl_lo; /* File ACL */ - __le32 i_size_high; + __le32 i_file_acl; /* File ACL */ + __le32 i_dir_acl; /* Directory ACL */ __le32 i_obso_faddr; /* Obsoleted fragment address */ union { struct { - __le16 l_i_blocks_high; /* were l_i_reserved1 */ + __le16 l_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ __le16 l_i_file_acl_high; __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ @@ -381,9 +328,9 @@ struct ext4_inode { __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ __le32 i_crtime; /* File Creation time */ __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ - __le32 i_version_hi; /* high 32 bits for 64-bit version */ }; +#define i_size_high i_dir_acl #define EXT4_EPOCH_BITS 2 #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) @@ -455,12 +402,9 @@ do { \ raw_inode->xtime ## _extra); \ } while (0) -#define i_disk_version osd1.linux1.l_i_version - #if defined(__KERNEL__) || defined(__linux__) #define i_reserved1 osd1.linux1.l_i_reserved1 #define i_file_acl_high osd2.linux2.l_i_file_acl_high -#define i_blocks_high osd2.linux2.l_i_blocks_high #define i_uid_low i_uid #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high @@ -517,10 +461,7 @@ do { \ #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ #define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ -#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ -#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ -#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ -#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ + /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt @@ -540,7 +481,6 @@ do { \ #define ext4_test_bit ext2_test_bit #define ext4_find_first_zero_bit ext2_find_first_zero_bit #define ext4_find_next_zero_bit ext2_find_next_zero_bit -#define ext4_find_next_bit ext2_find_next_bit /* * Maximal mount counts between two filesystem checks @@ -731,7 +671,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 -#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008 #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 @@ -743,7 +682,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 #define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 -#define EXT4_FEATURE_INCOMPAT_MMP 0x0100 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR @@ -758,8 +696,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ - EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ - EXT4_FEATURE_RO_COMPAT_HUGE_FILE) + EXT4_FEATURE_RO_COMPAT_BTREE_DIR) /* * Default values for user and/or group using reserved blocks @@ -830,26 +767,6 @@ struct ext4_dir_entry_2 { #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) #define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ ~EXT4_DIR_ROUND) -#define EXT4_MAX_REC_LEN ((1<<16)-1) - -static inline unsigned ext4_rec_len_from_disk(__le16 dlen) -{ - unsigned len = le16_to_cpu(dlen); - - if (len == EXT4_MAX_REC_LEN) - return 1 << 16; - return len; -} - -static inline __le16 ext4_rec_len_to_disk(unsigned len) -{ - if (len == (1 << 16)) - return cpu_to_le16(EXT4_MAX_REC_LEN); - else if (len > (1 << 16)) - BUG(); - return cpu_to_le16(len); -} - /* * Hash Tree Directory indexing * (c) Daniel Phillips, 2001 @@ -893,7 +810,7 @@ struct ext4_iloc { struct buffer_head *bh; unsigned long offset; - ext4_group_t block_group; + unsigned long block_group; }; static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc) @@ -918,7 +835,7 @@ struct dir_private_info { /* calculate the first block number of the group */ static inline ext4_fsblk_t -ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) +ext4_group_first_block_no(struct super_block *sb, unsigned long group_no) { return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); @@ -949,24 +866,21 @@ extern unsigned int ext4_block_group(struct super_block *sb, ext4_fsblk_t blocknr); extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, ext4_fsblk_t blocknr); -extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); -extern unsigned long ext4_bg_num_gdb(struct super_block *sb, - ext4_group_t group); +extern int ext4_bg_has_super(struct super_block *sb, int group); +extern unsigned long ext4_bg_num_gdb(struct super_block *sb, int group); extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, ext4_fsblk_t goal, int *errp); extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, ext4_fsblk_t goal, unsigned long *count, int *errp); -extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp); extern void ext4_free_blocks (handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count, int metadata); + ext4_fsblk_t block, unsigned long count); extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count, unsigned long *pdquot_freed_blocks); extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); extern void ext4_check_blocks_bitmap (struct super_block *); extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, - ext4_group_t block_group, + unsigned int block_group, struct buffer_head ** bh); extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); extern void ext4_init_block_alloc_info(struct inode *); @@ -997,32 +911,15 @@ extern unsigned long ext4_count_dirs (struct super_block *); extern void ext4_check_inodes_bitmap (struct super_block *); extern unsigned long ext4_count_free (struct buffer_head *, unsigned); -/* mballoc.c */ -extern long ext4_mb_stats; -extern long ext4_mb_max_to_scan; -extern int ext4_mb_init(struct super_block *, int); -extern int ext4_mb_release(struct super_block *); -extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, - struct ext4_allocation_request *, int *); -extern int ext4_mb_reserve_blocks(struct super_block *, int); -extern void ext4_mb_discard_inode_preallocations(struct inode *); -extern int __init init_ext4_mballoc(void); -extern void exit_ext4_mballoc(void); -extern void ext4_mb_free_blocks(handle_t *, struct inode *, - unsigned long, unsigned long, int, unsigned long *); - /* inode.c */ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr); -struct buffer_head *ext4_getblk(handle_t *, struct inode *, - ext4_lblk_t, int, int *); -struct buffer_head *ext4_bread(handle_t *, struct inode *, - ext4_lblk_t, int, int *); +struct buffer_head * ext4_getblk (handle_t *, struct inode *, long, int, int *); +struct buffer_head * ext4_bread (handle_t *, struct inode *, int, int, int *); int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, unsigned long maxblocks, - struct buffer_head *bh_result, - int create, int extend_disksize); + sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, + int create, int extend_disksize); extern void ext4_read_inode (struct inode *); extern int ext4_write_inode (struct inode *, int); @@ -1046,9 +943,6 @@ extern int ext4_ioctl (struct inode *, struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); -/* migrate.c */ -extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, - unsigned long); /* namei.c */ extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *); @@ -1071,12 +965,6 @@ extern void ext4_abort (struct super_block *, const char *, const char *, ...) extern void ext4_warning (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); extern void ext4_update_dynamic_rev (struct super_block *sb); -extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, - __u32 compat); -extern int ext4_update_rocompat_feature(handle_t *handle, - struct super_block *sb, __u32 rocompat); -extern int ext4_update_incompat_feature(handle_t *handle, - struct super_block *sb, __u32 incompat); extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg); extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, @@ -1129,29 +1017,6 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); } -static inline loff_t ext4_isize(struct ext4_inode *raw_inode) -{ - return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | - le32_to_cpu(raw_inode->i_size_lo); -} - -static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) -{ - raw_inode->i_size_lo = cpu_to_le32(i_size); - raw_inode->i_size_high = cpu_to_le32(i_size >> 32); -} - -static inline -struct ext4_group_info *ext4_get_group_info(struct super_block *sb, - ext4_group_t group) -{ - struct ext4_group_info ***grp_info; - long indexv, indexh; - grp_info = EXT4_SB(sb)->s_group_info; - indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); - indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); - return grp_info[indexv][indexh]; -} #define ext4_std_error(sb, errno) \ @@ -1183,7 +1048,7 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; extern int ext4_ext_tree_init(handle_t *handle, struct inode *); extern int ext4_ext_writepage_trans_blocks(struct inode *, int); extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, + ext4_fsblk_t iblock, unsigned long max_blocks, struct buffer_head *bh_result, int create, int extend_disksize); extern void ext4_ext_truncate(struct inode *, struct page *); @@ -1191,10 +1056,19 @@ extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len); -extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, - sector_t block, unsigned long max_blocks, - struct buffer_head *bh, int create, - int extend_disksize); +static inline int +ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, + unsigned long max_blocks, struct buffer_head *bh, + int create, int extend_disksize) +{ + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) + return ext4_ext_get_blocks(handle, inode, block, max_blocks, + bh, create, extend_disksize); + return ext4_get_blocks_handle(handle, inode, block, max_blocks, bh, + create, extend_disksize); +} + + #endif /* __KERNEL__ */ #endif /* _LINUX_EXT4_FS_H */ diff --git a/trunk/include/linux/ext4_fs_extents.h b/trunk/include/linux/ext4_fs_extents.h index 697da4bce6c5..d2045a26195d 100644 --- a/trunk/include/linux/ext4_fs_extents.h +++ b/trunk/include/linux/ext4_fs_extents.h @@ -124,6 +124,20 @@ struct ext4_ext_path { #define EXT4_EXT_CACHE_GAP 1 #define EXT4_EXT_CACHE_EXTENT 2 +/* + * to be called by ext4_ext_walk_space() + * negative retcode - error + * positive retcode - signal for ext4_ext_walk_space(), see below + * callback must return valid extent (passed or newly created) + */ +typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, + struct ext4_ext_cache *, + void *); + +#define EXT_CONTINUE 0 +#define EXT_BREAK 1 +#define EXT_REPEAT 2 + #define EXT_MAX_BLOCK 0xffffffff @@ -212,8 +226,6 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); } -extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); -extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); extern int ext4_extent_tree_init(handle_t *, struct inode *); extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); extern int ext4_ext_try_to_merge(struct inode *inode, @@ -221,11 +233,8 @@ extern int ext4_ext_try_to_merge(struct inode *inode, struct ext4_extent *); extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); -extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, - struct ext4_ext_path *); -extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, - ext4_lblk_t *, ext4_fsblk_t *); -extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, - ext4_lblk_t *, ext4_fsblk_t *); +extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *); +extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *); + #endif /* _LINUX_EXT4_EXTENTS */ diff --git a/trunk/include/linux/ext4_fs_i.h b/trunk/include/linux/ext4_fs_i.h index d5508d3cf290..86ddfe2089f3 100644 --- a/trunk/include/linux/ext4_fs_i.h +++ b/trunk/include/linux/ext4_fs_i.h @@ -27,12 +27,6 @@ typedef int ext4_grpblk_t; /* data type for filesystem-wide blocks number */ typedef unsigned long long ext4_fsblk_t; -/* data type for file logical block number */ -typedef __u32 ext4_lblk_t; - -/* data type for block group number */ -typedef unsigned long ext4_group_t; - struct ext4_reserve_window { ext4_fsblk_t _rsv_start; /* First byte reserved */ ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ @@ -54,7 +48,7 @@ struct ext4_block_alloc_info { * most-recently-allocated block in this file. * We use this for detecting linearly ascending allocation requests. */ - ext4_lblk_t last_alloc_logical_block; + __u32 last_alloc_logical_block; /* * Was i_next_alloc_goal in ext4_inode_info * is the *physical* companion to i_next_alloc_block. @@ -73,7 +67,7 @@ struct ext4_block_alloc_info { */ struct ext4_ext_cache { ext4_fsblk_t ec_start; - ext4_lblk_t ec_block; + __u32 ec_block; __u32 ec_len; /* must be 32bit to return holes */ __u32 ec_type; }; @@ -85,6 +79,7 @@ struct ext4_inode_info { __le32 i_data[15]; /* unconverted */ __u32 i_flags; ext4_fsblk_t i_file_acl; + __u32 i_dir_acl; __u32 i_dtime; /* @@ -94,13 +89,13 @@ struct ext4_inode_info { * place a file's data blocks near its inode block, and new inodes * near to their parent directory's inode. */ - ext4_group_t i_block_group; + __u32 i_block_group; __u32 i_state; /* Dynamic state flags for ext4 */ /* block reservation info */ struct ext4_block_alloc_info *i_block_alloc_info; - ext4_lblk_t i_dir_start_lookup; + __u32 i_dir_start_lookup; #ifdef CONFIG_EXT4DEV_FS_XATTR /* * Extended attributes can be read independently of the main file @@ -139,16 +134,16 @@ struct ext4_inode_info { __u16 i_extra_isize; /* - * i_data_sem is for serialising ext4_truncate() against + * truncate_mutex is for serialising ext4_truncate() against * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's * data tree are chopped off during truncate. We can't do that in * ext4 because whenever we perform intermediate commits during * truncate, the inode and all the metadata blocks *must* be in a * consistent state which allows truncation of the orphans to restart * during recovery. Hence we must fix the get_block-vs-truncate race - * by other means, so we have i_data_sem. + * by other means, so we have truncate_mutex. */ - struct rw_semaphore i_data_sem; + struct mutex truncate_mutex; struct inode vfs_inode; unsigned long i_ext_generation; @@ -158,10 +153,6 @@ struct ext4_inode_info { * struct timespec i_{a,c,m}time in the generic inode. */ struct timespec i_crtime; - - /* mballoc */ - struct list_head i_prealloc_list; - spinlock_t i_prealloc_lock; }; #endif /* _LINUX_EXT4_FS_I */ diff --git a/trunk/include/linux/ext4_fs_sb.h b/trunk/include/linux/ext4_fs_sb.h index abaae2c8cccf..b40e827cd495 100644 --- a/trunk/include/linux/ext4_fs_sb.h +++ b/trunk/include/linux/ext4_fs_sb.h @@ -35,10 +35,9 @@ struct ext4_sb_info { unsigned long s_itb_per_group; /* Number of inode table blocks per group */ unsigned long s_gdb_count; /* Number of group descriptor blocks */ unsigned long s_desc_per_block; /* Number of group descriptors per block */ - ext4_group_t s_groups_count; /* Number of groups in the fs */ + unsigned long s_groups_count; /* Number of groups in the fs */ unsigned long s_overhead_last; /* Last calculated overhead */ unsigned long s_blocks_last; /* Last seen block count */ - loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ struct buffer_head ** s_group_desc; @@ -91,58 +90,6 @@ struct ext4_sb_info { unsigned long s_ext_blocks; unsigned long s_ext_extents; #endif - - /* for buddy allocator */ - struct ext4_group_info ***s_group_info; - struct inode *s_buddy_cache; - long s_blocks_reserved; - spinlock_t s_reserve_lock; - struct list_head s_active_transaction; - struct list_head s_closed_transaction; - struct list_head s_committed_transaction; - spinlock_t s_md_lock; - tid_t s_last_transaction; - unsigned short *s_mb_offsets, *s_mb_maxs; - - /* tunables */ - unsigned long s_stripe; - unsigned long s_mb_stream_request; - unsigned long s_mb_max_to_scan; - unsigned long s_mb_min_to_scan; - unsigned long s_mb_stats; - unsigned long s_mb_order2_reqs; - unsigned long s_mb_group_prealloc; - /* where last allocation was done - for stream allocation */ - unsigned long s_mb_last_group; - unsigned long s_mb_last_start; - - /* history to debug policy */ - struct ext4_mb_history *s_mb_history; - int s_mb_history_cur; - int s_mb_history_max; - int s_mb_history_num; - struct proc_dir_entry *s_mb_proc; - spinlock_t s_mb_history_lock; - int s_mb_history_filter; - - /* stats for buddy allocator */ - spinlock_t s_mb_pa_lock; - atomic_t s_bal_reqs; /* number of reqs with len > 1 */ - atomic_t s_bal_success; /* we found long enough chunks */ - atomic_t s_bal_allocated; /* in blocks */ - atomic_t s_bal_ex_scanned; /* total extents scanned */ - atomic_t s_bal_goals; /* goal hits */ - atomic_t s_bal_breaks; /* too long searches */ - atomic_t s_bal_2orders; /* 2^order hits */ - spinlock_t s_bal_lock; - unsigned long s_mb_buddies_generated; - unsigned long long s_mb_generation_time; - atomic_t s_mb_lost_chunks; - atomic_t s_mb_preallocated; - atomic_t s_mb_discarded; - - /* locality groups */ - struct ext4_locality_group *s_locality_groups; }; #endif /* _LINUX_EXT4_FS_SB */ diff --git a/trunk/include/linux/fs.h b/trunk/include/linux/fs.h index a516b6716870..21398a5d688d 100644 --- a/trunk/include/linux/fs.h +++ b/trunk/include/linux/fs.h @@ -124,7 +124,6 @@ extern int dir_notify_enable; #define MS_SHARED (1<<20) /* change to shared */ #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ -#define MS_I_VERSION (1<<23) /* Update inode I_version field */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -174,7 +173,6 @@ extern int dir_notify_enable; ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) -#define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) @@ -601,7 +599,7 @@ struct inode { uid_t i_uid; gid_t i_gid; dev_t i_rdev; - u64 i_version; + unsigned long i_version; loff_t i_size; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; @@ -1396,21 +1394,6 @@ static inline void inode_dec_link_count(struct inode *inode) mark_inode_dirty(inode); } -/** - * inode_inc_iversion - increments i_version - * @inode: inode that need to be updated - * - * Every time the inode is modified, the i_version field will be incremented. - * The filesystem has to be mounted with i_version flag - */ - -static inline void inode_inc_iversion(struct inode *inode) -{ - spin_lock(&inode->i_lock); - inode->i_version++; - spin_unlock(&inode->i_lock); -} - extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); static inline void file_accessed(struct file *file) { diff --git a/trunk/include/linux/jbd2.h b/trunk/include/linux/jbd2.h index 2cbf6fdb1799..06ef11457051 100644 --- a/trunk/include/linux/jbd2.h +++ b/trunk/include/linux/jbd2.h @@ -149,28 +149,6 @@ typedef struct journal_header_s __be32 h_sequence; } journal_header_t; -/* - * Checksum types. - */ -#define JBD2_CRC32_CHKSUM 1 -#define JBD2_MD5_CHKSUM 2 -#define JBD2_SHA1_CHKSUM 3 - -#define JBD2_CRC32_CHKSUM_SIZE 4 - -#define JBD2_CHECKSUM_BYTES (32 / sizeof(u32)) -/* - * Commit block header for storing transactional checksums: - */ -struct commit_header { - __be32 h_magic; - __be32 h_blocktype; - __be32 h_sequence; - unsigned char h_chksum_type; - unsigned char h_chksum_size; - unsigned char h_padding[2]; - __be32 h_chksum[JBD2_CHECKSUM_BYTES]; -}; /* * The block tag: used to describe a single buffer in the journal. @@ -264,25 +242,31 @@ typedef struct journal_superblock_s ((j)->j_format_version >= 2 && \ ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) -#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001 - -#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 -#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 -#define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 +#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 +#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 /* Features known to this kernel version: */ -#define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM +#define JBD2_KNOWN_COMPAT_FEATURES 0 #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ - JBD2_FEATURE_INCOMPAT_64BIT | \ - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) + JBD2_FEATURE_INCOMPAT_64BIT) #ifdef __KERNEL__ #include #include -#define J_ASSERT(assert) BUG_ON(!(assert)) +#define JBD2_ASSERTIONS +#ifdef JBD2_ASSERTIONS +#define J_ASSERT(assert) \ +do { \ + if (!(assert)) { \ + printk (KERN_EMERG \ + "Assertion failure in %s() at %s:%d: \"%s\"\n", \ + __FUNCTION__, __FILE__, __LINE__, # assert); \ + BUG(); \ + } \ +} while (0) #if defined(CONFIG_BUFFER_DEBUG) void buffer_assertion_failure(struct buffer_head *bh); @@ -298,6 +282,10 @@ void buffer_assertion_failure(struct buffer_head *bh); #define J_ASSERT_JH(jh, expr) J_ASSERT(expr) #endif +#else +#define J_ASSERT(assert) do { } while (0) +#endif /* JBD2_ASSERTIONS */ + #if defined(JBD2_PARANOID_IOFAIL) #define J_EXPECT(expr, why...) J_ASSERT(expr) #define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) @@ -418,23 +406,9 @@ struct handle_s unsigned int h_sync: 1; /* sync-on-close */ unsigned int h_jdata: 1; /* force data journaling */ unsigned int h_aborted: 1; /* fatal error on handle */ - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map h_lockdep_map; -#endif }; -/* - * Some stats for checkpoint phase - */ -struct transaction_chp_stats_s { - unsigned long cs_chp_time; - unsigned long cs_forced_to_close; - unsigned long cs_written; - unsigned long cs_dropped; -}; - /* The transaction_t type is the guts of the journaling mechanism. It * tracks a compound transaction through its various states: * @@ -482,8 +456,6 @@ struct transaction_s /* * Transaction's current state * [no locking - only kjournald2 alters this] - * [j_list_lock] guards transition of a transaction into T_FINISHED - * state and subsequent call of __jbd2_journal_drop_transaction() * FIXME: needs barriers * KLUDGE: [use j_state_lock] */ @@ -571,21 +543,6 @@ struct transaction_s */ spinlock_t t_handle_lock; - /* - * Longest time some handle had to wait for running transaction - */ - unsigned long t_max_wait; - - /* - * When transaction started - */ - unsigned long t_start; - - /* - * Checkpointing stats [j_checkpoint_sem] - */ - struct transaction_chp_stats_s t_chp_stats; - /* * Number of outstanding updates running on this transaction * [t_handle_lock] @@ -617,39 +574,6 @@ struct transaction_s }; -struct transaction_run_stats_s { - unsigned long rs_wait; - unsigned long rs_running; - unsigned long rs_locked; - unsigned long rs_flushing; - unsigned long rs_logging; - - unsigned long rs_handle_count; - unsigned long rs_blocks; - unsigned long rs_blocks_logged; -}; - -struct transaction_stats_s { - int ts_type; - unsigned long ts_tid; - union { - struct transaction_run_stats_s run; - struct transaction_chp_stats_s chp; - } u; -}; - -#define JBD2_STATS_RUN 1 -#define JBD2_STATS_CHECKPOINT 2 - -static inline unsigned long -jbd2_time_diff(unsigned long start, unsigned long end) -{ - if (end >= start) - return end - start; - - return end + (MAX_JIFFY_OFFSET - start); -} - /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. @@ -711,12 +635,6 @@ jbd2_time_diff(unsigned long start, unsigned long end) * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the * number that will fit in j_blocksize * @j_last_sync_writer: most recent pid which did a synchronous write - * @j_history: Buffer storing the transactions statistics history - * @j_history_max: Maximum number of transactions in the statistics history - * @j_history_cur: Current number of transactions in the statistics history - * @j_history_lock: Protect the transactions statistics history - * @j_proc_entry: procfs entry for the jbd statistics directory - * @j_stats: Overall statistics * @j_private: An opaque pointer to fs-private information. */ @@ -908,19 +826,6 @@ struct journal_s pid_t j_last_sync_writer; - /* - * Journal statistics - */ - struct transaction_stats_s *j_history; - int j_history_max; - int j_history_cur; - /* - * Protect the transactions statistics history - */ - spinlock_t j_history_lock; - struct proc_dir_entry *j_proc_entry; - struct transaction_stats_s j_stats; - /* * An opaque pointer to fs-private information. ext3 puts its * superblock pointer here @@ -1027,8 +932,6 @@ extern int jbd2_journal_check_available_features (journal_t *, unsigned long, unsigned long, unsigned long); extern int jbd2_journal_set_features (journal_t *, unsigned long, unsigned long, unsigned long); -extern void jbd2_journal_clear_features - (journal_t *, unsigned long, unsigned long, unsigned long); extern int jbd2_journal_create (journal_t *); extern int jbd2_journal_load (journal_t *journal); extern void jbd2_journal_destroy (journal_t *); diff --git a/trunk/include/linux/module.h b/trunk/include/linux/module.h index aedc06be1de8..c97bdb7eb957 100644 --- a/trunk/include/linux/module.h +++ b/trunk/include/linux/module.h @@ -446,14 +446,11 @@ static inline void __module_get(struct module *module) __mod ? __mod->name : "kernel"; \ }) -/* For kallsyms to ask for address resolution. namebuf should be at - * least KSYM_NAME_LEN long: a pointer to namebuf is returned if - * found, otherwise NULL. */ -char *module_address_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, - char *namebuf); +/* For kallsyms to ask for address resolution. NULL means not found. */ +const char *module_address_lookup(unsigned long addr, + unsigned long *symbolsize, + unsigned long *offset, + char **modname); int lookup_module_symbol_name(unsigned long addr, char *symname); int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); @@ -519,11 +516,10 @@ static inline void module_put(struct module *module) #define module_name(mod) "kernel" /* For kallsyms to ask for address resolution. NULL means not found. */ -static inline char *module_address_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, - char *namebuf) +static inline const char *module_address_lookup(unsigned long addr, + unsigned long *symbolsize, + unsigned long *offset, + char **modname) { return NULL; } diff --git a/trunk/kernel/extable.c b/trunk/kernel/extable.c index a26cb2e17023..7fe262855317 100644 --- a/trunk/kernel/extable.c +++ b/trunk/kernel/extable.c @@ -46,8 +46,7 @@ int core_kernel_text(unsigned long addr) addr <= (unsigned long)_etext) return 1; - if (system_state == SYSTEM_BOOTING && - addr >= (unsigned long)_sinittext && + if (addr >= (unsigned long)_sinittext && addr <= (unsigned long)_einittext) return 1; return 0; diff --git a/trunk/kernel/kallsyms.c b/trunk/kernel/kallsyms.c index 7dadc71ce516..2fc25810509e 100644 --- a/trunk/kernel/kallsyms.c +++ b/trunk/kernel/kallsyms.c @@ -233,11 +233,10 @@ static unsigned long get_symbol_pos(unsigned long addr, int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) { - char namebuf[KSYM_NAME_LEN]; if (is_ksym_addr(addr)) return !!get_symbol_pos(addr, symbolsize, offset); - return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf); + return !!module_address_lookup(addr, symbolsize, offset, NULL); } /* @@ -252,6 +251,8 @@ const char *kallsyms_lookup(unsigned long addr, unsigned long *offset, char **modname, char *namebuf) { + const char *msym; + namebuf[KSYM_NAME_LEN - 1] = 0; namebuf[0] = 0; @@ -267,8 +268,10 @@ const char *kallsyms_lookup(unsigned long addr, } /* see if it's in a module */ - return module_address_lookup(addr, symbolsize, offset, modname, - namebuf); + msym = module_address_lookup(addr, symbolsize, offset, modname); + if (msym) + return strncpy(namebuf, msym, KSYM_NAME_LEN - 1); + return NULL; } diff --git a/trunk/kernel/module.c b/trunk/kernel/module.c index f6a4e721fd49..1bb4c5e0d56e 100644 --- a/trunk/kernel/module.c +++ b/trunk/kernel/module.c @@ -65,9 +65,6 @@ static DEFINE_MUTEX(module_mutex); static LIST_HEAD(modules); -/* Waiting for a module to finish initializing? */ -static DECLARE_WAIT_QUEUE_HEAD(module_wq); - static BLOCKING_NOTIFIER_HEAD(module_notify_list); int register_module_notifier(struct notifier_block * nb) @@ -87,11 +84,8 @@ EXPORT_SYMBOL(unregister_module_notifier); static inline int strong_try_module_get(struct module *mod) { if (mod && mod->state == MODULE_STATE_COMING) - return -EBUSY; - if (try_module_get(mod)) return 0; - else - return -ENOENT; + return try_module_get(mod); } static inline void add_taint_module(struct module *mod, unsigned flag) @@ -545,21 +539,11 @@ static int already_uses(struct module *a, struct module *b) static int use_module(struct module *a, struct module *b) { struct module_use *use; - int no_warn, err; + int no_warn; if (b == NULL || already_uses(a, b)) return 1; - /* If we're interrupted or time out, we fail. */ - if (wait_event_interruptible_timeout( - module_wq, (err = strong_try_module_get(b)) != -EBUSY, - 30 * HZ) <= 0) { - printk("%s: gave up waiting for init of module %s.\n", - a->name, b->name); - return 0; - } - - /* If strong_try_module_get() returned a different error, we fail. */ - if (err) + if (!strong_try_module_get(b)) return 0; DEBUGP("Allocating new usage for %s.\n", a->name); @@ -738,7 +722,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags) mutex_lock(&module_mutex); } /* Store the name of the last unloaded module for diagnostic purposes */ - strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); + sprintf(last_unloaded_module, mod->name); free_module(mod); out: @@ -832,7 +816,7 @@ static inline void module_unload_free(struct module *mod) static inline int use_module(struct module *a, struct module *b) { - return strong_try_module_get(b) == 0; + return strong_try_module_get(b); } static inline void module_unload_init(struct module *mod) @@ -1230,7 +1214,6 @@ void module_remove_modinfo_attrs(struct module *mod) int mod_sysfs_init(struct module *mod) { int err; - struct kobject *kobj; if (!module_sysfs_initialized) { printk(KERN_ERR "%s: module sysfs not initialized\n", @@ -1238,15 +1221,6 @@ int mod_sysfs_init(struct module *mod) err = -EINVAL; goto out; } - - kobj = kset_find_obj(module_kset, mod->name); - if (kobj) { - printk(KERN_ERR "%s: module is already loaded\n", mod->name); - kobject_put(kobj); - err = -EINVAL; - goto out; - } - mod->mkobj.mod = mod; memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj)); @@ -1303,17 +1277,6 @@ static void mod_kobject_remove(struct module *mod) kobject_put(&mod->mkobj.kobj); } -/* - * link the module with the whole machine is stopped with interrupts off - * - this defends against kallsyms not taking locks - */ -static int __link_module(void *_mod) -{ - struct module *mod = _mod; - list_add(&mod->list, &modules); - return 0; -} - /* * unlink the module with the whole machine is stopped with interrupts off * - this defends against kallsyms not taking locks @@ -1363,7 +1326,7 @@ void *__symbol_get(const char *symbol) preempt_disable(); value = __find_symbol(symbol, &owner, &crc, 1); - if (value && strong_try_module_get(owner) != 0) + if (value && !strong_try_module_get(owner)) value = 0; preempt_enable(); @@ -1926,7 +1889,7 @@ static struct module *load_module(void __user *umod, set_license(mod, get_modinfo(sechdrs, infoindex, "license")); if (strcmp(mod->name, "ndiswrapper") == 0) - add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + add_taint(TAINT_PROPRIETARY_MODULE); if (strcmp(mod->name, "driverloader") == 0) add_taint_module(mod, TAINT_PROPRIETARY_MODULE); @@ -2056,11 +2019,6 @@ static struct module *load_module(void __user *umod, printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", mod->name); - /* Now sew it into the lists so we can get lockdep and oops - * info during argument parsing. Noone should access us, since - * strong_try_module_get() will fail. */ - stop_machine_run(__link_module, mod, NR_CPUS); - /* Size of section 0 is 0, so this works well if no params */ err = parse_args(mod->name, mod->args, (struct kernel_param *) @@ -2069,7 +2027,7 @@ static struct module *load_module(void __user *umod, / sizeof(struct kernel_param), NULL); if (err < 0) - goto unlink; + goto arch_cleanup; err = mod_sysfs_setup(mod, (struct kernel_param *) @@ -2077,7 +2035,7 @@ static struct module *load_module(void __user *umod, sechdrs[setupindex].sh_size / sizeof(struct kernel_param)); if (err < 0) - goto unlink; + goto arch_cleanup; add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); @@ -2092,8 +2050,7 @@ static struct module *load_module(void __user *umod, /* Done! */ return mod; - unlink: - stop_machine_run(__unlink_module, mod, NR_CPUS); + arch_cleanup: module_arch_cleanup(mod); cleanup: kobject_del(&mod->mkobj.kobj); @@ -2118,6 +2075,17 @@ static struct module *load_module(void __user *umod, goto free_hdr; } +/* + * link the module with the whole machine is stopped with interrupts off + * - this defends against kallsyms not taking locks + */ +static int __link_module(void *_mod) +{ + struct module *mod = _mod; + list_add(&mod->list, &modules); + return 0; +} + /* This is where the real work happens */ asmlinkage long sys_init_module(void __user *umod, @@ -2142,6 +2110,10 @@ sys_init_module(void __user *umod, return PTR_ERR(mod); } + /* Now sew it into the lists. They won't access us, since + strong_try_module_get() will fail. */ + stop_machine_run(__link_module, mod, NR_CPUS); + /* Drop lock so they can recurse */ mutex_unlock(&module_mutex); @@ -2160,7 +2132,6 @@ sys_init_module(void __user *umod, mutex_lock(&module_mutex); free_module(mod); mutex_unlock(&module_mutex); - wake_up(&module_wq); return ret; } @@ -2175,7 +2146,6 @@ sys_init_module(void __user *umod, mod->init_size = 0; mod->init_text_size = 0; mutex_unlock(&module_mutex); - wake_up(&module_wq); return 0; } @@ -2240,13 +2210,14 @@ static const char *get_ksymbol(struct module *mod, return mod->strtab + mod->symtab[best].st_name; } -/* For kallsyms to ask for address resolution. NULL means not found. Careful - * not to lock to avoid deadlock on oopses, simply disable preemption. */ -char *module_address_lookup(unsigned long addr, - unsigned long *size, - unsigned long *offset, - char **modname, - char *namebuf) +/* For kallsyms to ask for address resolution. NULL means not found. + We don't lock, as this is used for oops resolution and races are a + lesser concern. */ +/* FIXME: Risky: returns a pointer into a module w/o lock */ +const char *module_address_lookup(unsigned long addr, + unsigned long *size, + unsigned long *offset, + char **modname) { struct module *mod; const char *ret = NULL; @@ -2261,13 +2232,8 @@ char *module_address_lookup(unsigned long addr, break; } } - /* Make a copy in here where it's safe */ - if (ret) { - strncpy(namebuf, ret, KSYM_NAME_LEN - 1); - ret = namebuf; - } preempt_enable(); - return (char *)ret; + return ret; } int lookup_module_symbol_name(unsigned long addr, char *symname) diff --git a/trunk/kernel/params.c b/trunk/kernel/params.c index 42fe5e6126c0..67f65ee7211d 100644 --- a/trunk/kernel/params.c +++ b/trunk/kernel/params.c @@ -376,6 +376,8 @@ int param_get_string(char *buffer, struct kernel_param *kp) extern struct kernel_param __start___param[], __stop___param[]; +#define MAX_KBUILD_MODNAME KOBJ_NAME_LEN + struct param_attribute { struct module_attribute mattr; @@ -585,7 +587,7 @@ static void __init param_sysfs_builtin(void) { struct kernel_param *kp, *kp_begin = NULL; unsigned int i, name_len, count = 0; - char modname[MODULE_NAME_LEN + 1] = ""; + char modname[MAX_KBUILD_MODNAME + 1] = ""; for (i=0; i < __stop___param - __start___param; i++) { char *dot; @@ -593,12 +595,12 @@ static void __init param_sysfs_builtin(void) kp = &__start___param[i]; max_name_len = - min_t(size_t, MODULE_NAME_LEN, strlen(kp->name)); + min_t(size_t, MAX_KBUILD_MODNAME, strlen(kp->name)); dot = memchr(kp->name, '.', max_name_len); if (!dot) { DEBUGP("couldn't find period in first %d characters " - "of %s\n", MODULE_NAME_LEN, kp->name); + "of %s\n", MAX_KBUILD_MODNAME, kp->name); continue; } name_len = dot - kp->name; diff --git a/trunk/lib/find_next_bit.c b/trunk/lib/find_next_bit.c index 78ccd73a8841..bda0d71a2514 100644 --- a/trunk/lib/find_next_bit.c +++ b/trunk/lib/find_next_bit.c @@ -178,47 +178,4 @@ unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned EXPORT_SYMBOL(generic_find_next_zero_le_bit); -unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned - long size, unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG - 1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset &= (BITS_PER_LONG - 1UL); - if (offset) { - tmp = ext2_swabp(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - - while (size & ~(BITS_PER_LONG - 1)) { - tmp = *(p++); - if (tmp) - goto found_middle_swap; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = ext2_swabp(p); -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); - -found_middle_swap: - return result + __ffs(ext2_swab(tmp)); -} -EXPORT_SYMBOL(generic_find_next_le_bit); #endif /* __BIG_ENDIAN */ diff --git a/trunk/scripts/kernel-doc b/trunk/scripts/kernel-doc index 1d1401807e95..e4fa8d9568ba 100755 --- a/trunk/scripts/kernel-doc +++ b/trunk/scripts/kernel-doc @@ -182,10 +182,10 @@ my $blankline_html = $local_lt . "p" . $local_gt; # was "

" my %highlights_xml = ( "([^=])\\\"([^\\\"<]+)\\\"", "\$1\$2", $type_constant, "\$1", $type_func, "\$1", - $type_struct, "\$1", + $type_struct_xml, "\$1", $type_env, "\$1", $type_param, "\$1" ); -my $blankline_xml = "\n"; +my $blankline_xml = $local_lt . "/para" . $local_gt . $local_lt . "para" . $local_gt . "\n"; # gnome, docbook format my %highlights_gnome = ( $type_constant, "\$1", @@ -394,7 +394,7 @@ sub output_highlight { # confess "output_highlight got called with no args?\n"; # } - if ($output_mode eq "html") { + if ($output_mode eq "html" || $output_mode eq "xml") { $contents = local_unescape($contents); # convert data read & converted thru xml_escape() into &xyz; format: $contents =~ s/\\\\\\/&/g;