Skip to content

Commit

Permalink
write support for preallocated blocks
Browse files Browse the repository at this point in the history
This patch adds write support to the uninitialized extents that get
created when a preallocation is done using fallocate(). It takes care of
splitting the extents into multiple (upto three) extents and merging the
new split extents with neighbouring ones, if possible.

Signed-off-by: Amit Arora <aarora@in.ibm.com>
  • Loading branch information
Amit Arora authored and Theodore Ts'o committed Jul 18, 2007
1 parent a2df2a6 commit 56055d3
Show file tree
Hide file tree
Showing 2 changed files with 225 additions and 32 deletions.
254 changes: 222 additions & 32 deletions fs/ext4/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,53 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
return 0;
}

/*
* This function tries to merge the "ex" extent to the next extent in the tree.
* It always tries to merge towards right. If you want to merge towards
* left, pass "ex - 1" as argument instead of "ex".
* Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
* 1 if they got merged.
*/
int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *ex)
{
struct ext4_extent_header *eh;
unsigned int depth, len;
int merge_done = 0;
int uninitialized = 0;

depth = ext_depth(inode);
BUG_ON(path[depth].p_hdr == NULL);
eh = path[depth].p_hdr;

while (ex < EXT_LAST_EXTENT(eh)) {
if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
break;
/* merge with next extent! */
if (ext4_ext_is_uninitialized(ex))
uninitialized = 1;
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(ex + 1));
if (uninitialized)
ext4_ext_mark_uninitialized(ex);

if (ex + 1 < EXT_LAST_EXTENT(eh)) {
len = (EXT_LAST_EXTENT(eh) - ex - 1)
* sizeof(struct ext4_extent);
memmove(ex + 1, ex + 2, len);
}
eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1);
merge_done = 1;
WARN_ON(eh->eh_entries == 0);
if (!eh->eh_entries)
ext4_error(inode->i_sb, "ext4_ext_try_to_merge",
"inode#%lu, eh->eh_entries = 0!", inode->i_ino);
}

return merge_done;
}

/*
* check if a portion of the "newext" extent overlaps with an
* existing extent.
Expand Down Expand Up @@ -1328,25 +1375,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,

merge:
/* try to merge extents to the right */
while (nearex < EXT_LAST_EXTENT(eh)) {
if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1))
break;
/* merge with next extent! */
if (ext4_ext_is_uninitialized(nearex))
uninitialized = 1;
nearex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(nearex)
+ ext4_ext_get_actual_len(nearex + 1));
if (uninitialized)
ext4_ext_mark_uninitialized(nearex);

if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
len = (EXT_LAST_EXTENT(eh) - nearex - 1)
* sizeof(struct ext4_extent);
memmove(nearex + 1, nearex + 2, len);
}
eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
BUG_ON(eh->eh_entries == 0);
}
ext4_ext_try_to_merge(inode, path, nearex);

/* try to merge extents to the left */

Expand Down Expand Up @@ -2012,15 +2041,158 @@ void ext4_ext_release(struct super_block *sb)
#endif
}

/*
* This function is called by ext4_ext_get_blocks() if someone tries to write
* to an uninitialized extent. It may result in splitting the uninitialized
* extent into multiple extents (upto three - one initialized and two
* uninitialized).
* There are three possibilities:
* a> There is no split required: Entire extent should be initialized
* b> Splits in two extents: Write is happening at either end of the extent
* c> Splits in three extents: Somone is writing in middle of the extent
*/
int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
ext4_fsblk_t iblock,
unsigned long max_blocks)
{
struct ext4_extent *ex, newex;
struct ext4_extent *ex1 = NULL;
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh;
unsigned int allocated, ee_block, ee_len, depth;
ext4_fsblk_t newblock;
int err = 0;
int ret = 0;

depth = ext_depth(inode);
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (iblock - ee_block);
newblock = iblock - ee_block + ext_pblock(ex);
ex2 = ex;

/* ex1: ee_block to iblock - 1 : uninitialized */
if (iblock > ee_block) {
ex1 = ex;
ex1->ee_len = cpu_to_le16(iblock - ee_block);
ext4_ext_mark_uninitialized(ex1);
ex2 = &newex;
}
/*
* for sanity, update the length of the ex2 extent before
* we insert ex3, if ex1 is NULL. This is to avoid temporary
* overlap of blocks.
*/
if (!ex1 && allocated > max_blocks)
ex2->ee_len = cpu_to_le16(max_blocks);
/* ex3: to ee_block + ee_len : uninitialised */
if (allocated > max_blocks) {
unsigned int newdepth;
ex3 = &newex;
ex3->ee_block = cpu_to_le32(iblock + max_blocks);
ext4_ext_store_pblock(ex3, newblock + max_blocks);
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3);
if (err)
goto out;
/*
* The depth, and hence eh & ex might change
* as part of the insert above.
*/
newdepth = ext_depth(inode);
if (newdepth != depth) {
depth = newdepth;
path = ext4_ext_find_extent(inode, iblock, NULL);
if (IS_ERR(path)) {
err = PTR_ERR(path);
path = NULL;
goto out;
}
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
if (ex2 != &newex)
ex2 = ex;
}
allocated = max_blocks;
}
/*
* If there was a change of depth as part of the
* insertion of ex3 above, we need to update the length
* of the ex1 extent again here
*/
if (ex1 && ex1 != ex) {
ex1 = ex;
ex1->ee_len = cpu_to_le16(iblock - ee_block);
ext4_ext_mark_uninitialized(ex1);
ex2 = &newex;
}
/* ex2: iblock to iblock + maxblocks-1 : initialised */
ex2->ee_block = cpu_to_le32(iblock);
ex2->ee_start = cpu_to_le32(newblock);
ext4_ext_store_pblock(ex2, newblock);
ex2->ee_len = cpu_to_le16(allocated);
if (ex2 != ex)
goto insert;
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
/*
* New (initialized) extent starts from the first block
* in the current extent. i.e., ex2 == ex
* We have to see if it can be merged with the extent
* on the left.
*/
if (ex2 > EXT_FIRST_EXTENT(eh)) {
/*
* To merge left, pass "ex2 - 1" to try_to_merge(),
* since it merges towards right _only_.
*/
ret = ext4_ext_try_to_merge(inode, path, ex2 - 1);
if (ret) {
err = ext4_ext_correct_indexes(handle, inode, path);
if (err)
goto out;
depth = ext_depth(inode);
ex2--;
}
}
/*
* Try to Merge towards right. This might be required
* only when the whole extent is being written to.
* i.e. ex2 == ex and ex3 == NULL.
*/
if (!ex3) {
ret = ext4_ext_try_to_merge(inode, path, ex2);
if (ret) {
err = ext4_ext_correct_indexes(handle, inode, path);
if (err)
goto out;
}
}
/* Mark modified extent as dirty */
err = ext4_ext_dirty(handle, inode, path + depth);
goto out;
insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex);
out:
return err ? err : allocated;
}

int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t iblock,
unsigned long max_blocks, struct buffer_head *bh_result,
int create, int extend_disksize)
{
struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh;
struct ext4_extent newex, *ex;
ext4_fsblk_t goal, newblock;
int err = 0, depth;
int err = 0, depth, ret;
unsigned long allocated = 0;

__clear_bit(BH_New, &bh_result->b_state);
Expand All @@ -2033,8 +2205,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
if (goal) {
if (goal == EXT4_EXT_CACHE_GAP) {
if (!create) {
/* block isn't allocated yet and
* user doesn't want to allocate it */
/*
* block isn't allocated yet and
* user doesn't want to allocate it
*/
goto out2;
}
/* we should allocate requested block */
Expand Down Expand Up @@ -2068,6 +2242,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* this is why assert can't be put in ext4_ext_find_extent()
*/
BUG_ON(path[depth].p_ext == NULL && depth != 0);
eh = path[depth].p_hdr;

ex = path[depth].p_ext;
if (ex) {
Expand All @@ -2076,13 +2251,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
unsigned short ee_len;

/*
* Allow future support for preallocated extents to be added
* as an RO_COMPAT feature:
* Uninitialized extents are treated as holes, except that
* we avoid (fail) allocating new blocks during a write.
* we split out initialized portions during a write.
*/
if (le16_to_cpu(ex->ee_len) > EXT_MAX_LEN)
goto out2;
ee_len = ext4_ext_get_actual_len(ex);
/* if found extent covers block, simply return it */
if (iblock >= ee_block && iblock < ee_block + ee_len) {
Expand All @@ -2091,12 +2262,27 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
allocated = ee_len - (iblock - ee_block);
ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock,
ee_block, ee_len, newblock);

/* Do not put uninitialized extent in the cache */
if (!ext4_ext_is_uninitialized(ex))
if (!ext4_ext_is_uninitialized(ex)) {
ext4_ext_put_in_cache(inode, ee_block,
ee_len, ee_start,
EXT4_EXT_CACHE_EXTENT);
goto out;
goto out;
}
if (create == EXT4_CREATE_UNINITIALIZED_EXT)
goto out;
if (!create)
goto out2;

ret = ext4_ext_convert_to_initialized(handle, inode,
path, iblock,
max_blocks);
if (ret <= 0)
goto out2;
else
allocated = ret;
goto outnew;
}
}

Expand All @@ -2105,8 +2291,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* we couldn't try to create block if create flag is zero
*/
if (!create) {
/* put just found gap into cache to speed up
* subsequent requests */
/*
* put just found gap into cache to speed up
* subsequent requests
*/
ext4_ext_put_gap_in_cache(inode, path, iblock);
goto out2;
}
Expand Down Expand Up @@ -2152,6 +2340,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,

/* previous routine could use block we allocated */
newblock = ext_pblock(&newex);
outnew:
__set_bit(BH_New, &bh_result->b_state);

/* Cache only when it is _not_ an uninitialized extent */
Expand Down Expand Up @@ -2221,7 +2410,8 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
err = ext4_ext_remove_space(inode, last_block);

/* In a multi-transaction truncate, we only make the final
* transaction synchronous. */
* transaction synchronous.
*/
if (IS_SYNC(inode))
handle->h_sync = 1;

Expand Down
3 changes: 3 additions & 0 deletions include/linux/ext4_fs_extents.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,9 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)

extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
extern int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *);
extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
Expand Down

0 comments on commit 56055d3

Please sign in to comment.