Skip to content

Commit

Permalink
[PATCH] ext3_fsblk_t: filesystem, group blocks and bug fixes
Browse files Browse the repository at this point in the history
Some of the in-kernel ext3 block variable type are treated as signed 4 bytes
int type, thus limited ext3 filesystem to 8TB (4kblock size based).  While
trying to fix them, it seems quite confusing in the ext3 code where some
blocks are filesystem-wide blocks, some are group relative offsets that need
to be signed value (as -1 has special meaning).  So it seem saner to define
two types of physical blocks: one is filesystem wide blocks, another is
group-relative blocks.  The following patches clarify these two types of
blocks in the ext3 code, and fix the type bugs which limit current 32 bit ext3
filesystem limit to 8TB.

With this series of patches and the percpu counter data type changes in the mm
tree, we are able to extend exts filesystem limit to 16TB.

This work is also a pre-request for the recent >32 bit ext3 work, and makes
the kernel to able to address 48 bit ext3 block a lot easier: Simply redefine
ext3_fsblk_t from unsigned long to sector_t and redefine the format string for
ext3 filesystem block corresponding.

Two RFC with a series patches have been posted to ext2-devel list and have
been reviewed and discussed:
http://marc.theaimsgroup.com/?l=ext2-devel&m=114722190816690&w=2

http://marc.theaimsgroup.com/?l=ext2-devel&m=114784919525942&w=2

Patches are tested on both 32 bit machine and 64 bit machine, <8TB ext3 and
>8TB ext3 filesystem(with the latest to be released e2fsprogs-1.39).  Tests
includes overnight fsx, tiobench, dbench and fsstress.

This patch:

Defines ext3_fsblk_t and ext3_grpblk_t, and the printk format string for
filesystem wide blocks.

This patch classifies all block group relative blocks, and ext3_fsblk_t blocks
occurs in the same function where used to be confusing before.  Also include
kernel bug fixes for filesystem wide in-kernel block variables.  There are
some fileystem wide blocks are treated as int/unsigned int type in the kernel
currently, especially in ext3 block allocation and reservation code.  This
patch fixed those bugs by converting those variables to ext3_fsblk_t(unsigned
long) type.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
Mingming Cao authored and Linus Torvalds committed Jun 25, 2006
1 parent cedfb17 commit 1c2bf37
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 149 deletions.
215 changes: 112 additions & 103 deletions fs/ext3/balloc.c

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions fs/ext3/ialloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,11 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
int ngroups = sbi->s_groups_count;
int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
int freei, avefreei;
int freeb, avefreeb;
int blocks_per_dir, ndirs;
int max_debt, max_dirs, min_blocks, min_inodes;
ext3_fsblk_t freeb, avefreeb;
ext3_fsblk_t blocks_per_dir;
int ndirs;
int max_debt, max_dirs, min_inodes;
ext3_grpblk_t min_blocks;
int group = -1, i;
struct ext3_group_desc *desc;
struct buffer_head *bh;
Expand Down Expand Up @@ -307,7 +309,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
min_inodes = avefreei - inodes_per_group / 4;
min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;

max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST);
max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
if (max_debt * INODE_COST > inodes_per_group)
max_debt = inodes_per_group / INODE_COST;
if (max_debt > 255)
Expand Down
2 changes: 1 addition & 1 deletion fs/ext3/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ static int ext3_inode_is_fast_symlink(struct inode *inode)
* still needs to be revoked.
*/
int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, int blocknr)
struct buffer_head *bh, ext3_fsblk_t blocknr)
{
int err;

Expand Down
43 changes: 24 additions & 19 deletions fs/ext3/resize.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@ static int verify_group_input(struct super_block *sb,
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
struct ext3_super_block *es = sbi->s_es;
unsigned start = le32_to_cpu(es->s_blocks_count);
unsigned end = start + input->blocks_count;
ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count);
ext3_fsblk_t end = start + input->blocks_count;
unsigned group = input->group;
unsigned itend = input->inode_table + sbi->s_itb_per_group;
ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
unsigned overhead = ext3_bg_has_super(sb, group) ?
(1 + ext3_bg_num_gdb(sb, group) +
le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
unsigned metaend = start + overhead;
ext3_fsblk_t metaend = start + overhead;
struct buffer_head *bh = NULL;
int free_blocks_count;
ext3_grpblk_t free_blocks_count;
int err = -EINVAL;

input->free_blocks_count = free_blocks_count =
Expand All @@ -64,7 +64,8 @@ static int verify_group_input(struct super_block *sb,
ext3_warning(sb, __FUNCTION__, "Bad blocks count %u",
input->blocks_count);
else if (!(bh = sb_bread(sb, end - 1)))
ext3_warning(sb, __FUNCTION__, "Cannot read last block (%u)",
ext3_warning(sb, __FUNCTION__,
"Cannot read last block ("E3FSBLK")",
end - 1);
else if (outside(input->block_bitmap, start, end))
ext3_warning(sb, __FUNCTION__,
Expand All @@ -77,32 +78,35 @@ static int verify_group_input(struct super_block *sb,
else if (outside(input->inode_table, start, end) ||
outside(itend - 1, start, end))
ext3_warning(sb, __FUNCTION__,
"Inode table not in group (blocks %u-%u)",
"Inode table not in group (blocks %u-"E3FSBLK")",
input->inode_table, itend - 1);
else if (input->inode_bitmap == input->block_bitmap)
ext3_warning(sb, __FUNCTION__,
"Block bitmap same as inode bitmap (%u)",
input->block_bitmap);
else if (inside(input->block_bitmap, input->inode_table, itend))
ext3_warning(sb, __FUNCTION__,
"Block bitmap (%u) in inode table (%u-%u)",
"Block bitmap (%u) in inode table (%u-"E3FSBLK")",
input->block_bitmap, input->inode_table, itend-1);
else if (inside(input->inode_bitmap, input->inode_table, itend))
ext3_warning(sb, __FUNCTION__,
"Inode bitmap (%u) in inode table (%u-%u)",
"Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
input->inode_bitmap, input->inode_table, itend-1);
else if (inside(input->block_bitmap, start, metaend))
ext3_warning(sb, __FUNCTION__,
"Block bitmap (%u) in GDT table (%u-%u)",
"Block bitmap (%u) in GDT table"
" ("E3FSBLK"-"E3FSBLK")",
input->block_bitmap, start, metaend - 1);
else if (inside(input->inode_bitmap, start, metaend))
ext3_warning(sb, __FUNCTION__,
"Inode bitmap (%u) in GDT table (%u-%u)",
"Inode bitmap (%u) in GDT table"
" ("E3FSBLK"-"E3FSBLK")",
input->inode_bitmap, start, metaend - 1);
else if (inside(input->inode_table, start, metaend) ||
inside(itend - 1, start, metaend))
ext3_warning(sb, __FUNCTION__,
"Inode table (%u-%u) overlaps GDT table (%u-%u)",
"Inode table (%u-"E3FSBLK") overlaps"
"GDT table ("E3FSBLK"-"E3FSBLK")",
input->inode_table, itend - 1, start, metaend - 1);
else
err = 0;
Expand Down Expand Up @@ -171,7 +175,7 @@ static int setup_new_group_blocks(struct super_block *sb,
struct buffer_head *bh;
handle_t *handle;
unsigned long block;
int bit;
ext3_grpblk_t bit;
int i;
int err = 0, err2;

Expand Down Expand Up @@ -340,7 +344,7 @@ static int verify_reserved_gdb(struct super_block *sb,
while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
ext3_warning(sb, __FUNCTION__,
"reserved GDT %ld missing grp %d (%ld)",
"reserved GDT %lu missing grp %d (%lu)",
blk, grp,
grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
return -EINVAL;
Expand Down Expand Up @@ -906,11 +910,12 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
{
unsigned long o_blocks_count;
unsigned long o_groups_count;
unsigned long last;
int add;
ext3_grpblk_t last;
ext3_grpblk_t add;
struct buffer_head * bh;
handle_t *handle;
int err, freed_blocks;
int err;
unsigned long freed_blocks;

/* We don't need to worry about locking wrt other resizers just
* yet: we're going to revalidate es->s_blocks_count after
Expand Down Expand Up @@ -1001,10 +1006,10 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
sb->s_dirt = 1;
unlock_super(sb);
ext3_debug("freeing blocks %ld through %ld\n", o_blocks_count,
ext3_debug("freeing blocks %lu through %lu\n", o_blocks_count,
o_blocks_count + add);
ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
ext3_debug("freed blocks %ld through %ld\n", o_blocks_count,
ext3_debug("freed blocks %lu through %lu\n", o_blocks_count,
o_blocks_count + add);
if ((err = ext3_journal_stop(handle)))
goto exit_put;
Expand Down
2 changes: 1 addition & 1 deletion fs/ext3/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -1841,7 +1841,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
struct buffer_head * bh;
journal_t *journal;
int start;
int len;
ext3_fsblk_t len;
int hblock, blocksize;
unsigned long sb_block;
unsigned long offset;
Expand Down
27 changes: 14 additions & 13 deletions fs/ext3/xattr.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,15 +225,15 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
error = -ENODATA;
if (!EXT3_I(inode)->i_file_acl)
goto cleanup;
ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
if (!bh)
goto cleanup;
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext3_xattr_check_block(bh)) {
bad_block: ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
"inode %ld: bad block %u", inode->i_ino,
EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
Expand Down Expand Up @@ -366,7 +366,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
error = 0;
if (!EXT3_I(inode)->i_file_acl)
goto cleanup;
ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
error = -EIO;
if (!bh)
Expand All @@ -375,7 +375,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext3_xattr_check_block(bh)) {
ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
"inode %ld: bad block %u", inode->i_ino,
EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
Expand Down Expand Up @@ -647,7 +647,7 @@ ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
le32_to_cpu(BHDR(bs->bh)->h_refcount));
if (ext3_xattr_check_block(bs->bh)) {
ext3_error(sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
"inode %ld: bad block %u", inode->i_ino,
EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
Expand Down Expand Up @@ -792,11 +792,12 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
get_bh(new_bh);
} else {
/* We need to allocate a new block */
int goal = le32_to_cpu(
ext3_fsblk_t goal = le32_to_cpu(
EXT3_SB(sb)->s_es->s_first_data_block) +
EXT3_I(inode)->i_block_group *
(ext3_fsblk_t)EXT3_I(inode)->i_block_group *
EXT3_BLOCKS_PER_GROUP(sb);
int block = ext3_new_block(handle, inode, goal, &error);
ext3_fsblk_t block = ext3_new_block(handle, inode,
goal, &error);
if (error)
goto cleanup;
ea_idebug(inode, "creating block %d", block);
Expand Down Expand Up @@ -847,7 +848,7 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,

bad_block:
ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
"inode %ld: bad block %u", inode->i_ino,
EXT3_I(inode)->i_file_acl);
goto cleanup;

Expand Down Expand Up @@ -1076,14 +1077,14 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
if (!bh) {
ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: block %d read error", inode->i_ino,
"inode %ld: block %u read error", inode->i_ino,
EXT3_I(inode)->i_file_acl);
goto cleanup;
}
if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1)) {
ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
"inode %ld: bad block %u", inode->i_ino,
EXT3_I(inode)->i_file_acl);
goto cleanup;
}
Expand Down Expand Up @@ -1210,11 +1211,11 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: block %ld read error",
"inode %ld: block %lu read error",
inode->i_ino, (unsigned long) ce->e_block);
} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
EXT3_XATTR_REFCOUNT_MAX) {
ea_idebug(inode, "block %ld refcount %d>=%d",
ea_idebug(inode, "block %lu refcount %d>=%d",
(unsigned long) ce->e_block,
le32_to_cpu(BHDR(bh)->h_refcount),
EXT3_XATTR_REFCOUNT_MAX);
Expand Down
19 changes: 11 additions & 8 deletions include/linux/ext3_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -730,13 +730,15 @@ struct dir_private_info {
/* balloc.c */
extern int ext3_bg_has_super(struct super_block *sb, int group);
extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
extern int ext3_new_blocks (handle_t *, struct inode *, unsigned long,
unsigned long *, int *);
extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
unsigned long);
extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
unsigned long, unsigned long, int *);
extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
ext3_fsblk_t goal, int *errp);
extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
ext3_fsblk_t goal, unsigned long *count, int *errp);
extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
ext3_fsblk_t block, unsigned long count);
extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
ext3_fsblk_t block, unsigned long count,
unsigned long *pdquot_freed_blocks);
extern unsigned long ext3_count_free_blocks (struct super_block *);
extern void ext3_check_blocks_bitmap (struct super_block *);
extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
Expand Down Expand Up @@ -773,7 +775,8 @@ extern unsigned long ext3_count_free (struct buffer_head *, unsigned);


/* inode.c */
int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext3_fsblk_t blocknr);
struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
Expand Down
8 changes: 8 additions & 0 deletions include/linux/ext3_fs_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@
#include <linux/seqlock.h>
#include <linux/mutex.h>

/* data type for block offset of block group */
typedef int ext3_grpblk_t;

/* data type for filesystem-wide blocks number */
typedef unsigned long ext3_fsblk_t;

#define E3FSBLK "%lu"

struct ext3_reserve_window {
__u32 _rsv_start; /* First byte reserved */
__u32 _rsv_end; /* Last byte reserved or 0 */
Expand Down

0 comments on commit 1c2bf37

Please sign in to comment.