Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 367995
b: refs/heads/master
c: 27dd438
h: refs/heads/master
i:
  367993: bd4ce4c
  367991: 3eb653e
v: v3
  • Loading branch information
Lukas Czerner authored and Theodore Ts'o committed Apr 10, 2013
1 parent 6e90f4b commit cffbae7
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 24 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: f45a5ef91bef7e02149a216ed6dc3fcdd8b38268
refs/heads/master: 27dd43854227bb0e6ab70129bd21b60d396db2e7
11 changes: 11 additions & 0 deletions trunk/Documentation/filesystems/ext4.txt
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,17 @@ Files in /sys/fs/ext4/<devname>
session_write_kbytes This file is read-only and shows the number of
kilobytes of data that have been written to this
filesystem since it was mounted.

reserved_clusters This is RW file and contains number of reserved
clusters in the file system which will be used
in the specific situations to avoid costly
zeroout, unexpected ENOSPC, or possible data
loss. The default is 2% or 4096 clusters,
whichever is smaller and this can be changed
however it can never exceed number of clusters
in the file system. If there is not enough space
for the reserved space when mounting the file
mount will _not_ fail.
..............................................................................

Ioctls
Expand Down
18 changes: 13 additions & 5 deletions trunk/fs/ext4/balloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -499,36 +499,44 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
s64 nclusters, unsigned int flags)
{
s64 free_clusters, dirty_clusters, root_clusters;
s64 free_clusters, dirty_clusters, rsv, resv_clusters;
struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;

free_clusters = percpu_counter_read_positive(fcc);
dirty_clusters = percpu_counter_read_positive(dcc);
resv_clusters = atomic64_read(&sbi->s_resv_clusters);

/*
* r_blocks_count should always be multiple of the cluster ratio so
* we are safe to do a plane bit shift only.
*/
root_clusters = ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
rsv = (ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits) +
resv_clusters;

if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
if (free_clusters - (nclusters + rsv + dirty_clusters) <
EXT4_FREECLUSTERS_WATERMARK) {
free_clusters = percpu_counter_sum_positive(fcc);
dirty_clusters = percpu_counter_sum_positive(dcc);
}
/* Check whether we have space after accounting for current
* dirty clusters & root reserved clusters.
*/
if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters))
if (free_clusters >= (rsv + nclusters + dirty_clusters))
return 1;

/* Hm, nope. Are (enough) root reserved clusters available? */
if (uid_eq(sbi->s_resuid, current_fsuid()) ||
(!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
capable(CAP_SYS_RESOURCE) ||
(flags & EXT4_MB_USE_ROOT_BLOCKS)) {
(flags & EXT4_MB_USE_ROOT_BLOCKS)) {

if (free_clusters >= (nclusters + dirty_clusters +
resv_clusters))
return 1;
}
/* No free blocks. Let's see if we can dip into reserved pool */
if (flags & EXT4_MB_USE_RESERVED) {
if (free_clusters >= (nclusters + dirty_clusters))
return 1;
}
Expand Down
13 changes: 8 additions & 5 deletions trunk/fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ typedef unsigned int ext4_group_t;
#define EXT4_MB_STREAM_ALLOC 0x0800
/* Use reserved root blocks if needed */
#define EXT4_MB_USE_ROOT_BLOCKS 0x1000
/* Use blocks from reserved pool */
#define EXT4_MB_USE_RESERVED 0x2000

struct ext4_allocation_request {
/* target inode for block we're allocating */
Expand Down Expand Up @@ -557,9 +559,8 @@ enum {
#define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002
#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\
EXT4_GET_BLOCKS_CREATE)
/* Caller is from the delayed allocation writeout path,
so set the magic i_delalloc_reserve_flag after taking the
inode allocation semaphore for */
/* Caller is from the delayed allocation writeout path
* finally doing the actual allocation of delayed blocks */
#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
/* caller is from the direct IO path, request to creation of an
unitialized extents if not allocated, split the uninitialized
Expand All @@ -571,8 +572,9 @@ enum {
/* Convert extent to initialized after IO complete */
#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
/* Punch out blocks of an extent */
#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020
/* Eventual metadata allocation (due to growing extent tree)
* should not fail, so try to use reserved blocks for that.*/
#define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020
/* Don't normalize allocation size (used for fallocate) */
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
/* Request will not result in inode size update (user for fallocate) */
Expand Down Expand Up @@ -1188,6 +1190,7 @@ struct ext4_sb_info {
unsigned int s_mount_flags;
unsigned int s_def_mount_opt;
ext4_fsblk_t s_sb_block;
atomic64_t s_resv_clusters;
kuid_t s_resuid;
kgid_t s_resgid;
unsigned short s_mount_state;
Expand Down
27 changes: 18 additions & 9 deletions trunk/fs/ext4/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -1942,8 +1942,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
* There is no free space in the found leaf.
* We're gonna add a new leaf in the tree.
*/
if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT)
flags = EXT4_MB_USE_ROOT_BLOCKS;
if (flag & EXT4_GET_BLOCKS_METADATA_NOFAIL)
flags = EXT4_MB_USE_RESERVED;
err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);
if (err)
goto cleanup;
Expand Down Expand Up @@ -2729,12 +2729,14 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,

/*
* Split the extent in two so that 'end' is the last
* block in the first new extent
* block in the first new extent. Also we should not
* fail removing space due to ENOSPC so try to use
* reserved block if that happens.
*/
err = ext4_split_extent_at(handle, inode, path,
end + 1, split_flag,
EXT4_GET_BLOCKS_PRE_IO |
EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
end + 1, split_flag,
EXT4_GET_BLOCKS_PRE_IO |
EXT4_GET_BLOCKS_METADATA_NOFAIL);

if (err < 0)
goto out;
Expand Down Expand Up @@ -3209,7 +3211,8 @@ static int ext4_split_extent(handle_t *handle,
static int ext4_ext_convert_to_initialized(handle_t *handle,
struct inode *inode,
struct ext4_map_blocks *map,
struct ext4_ext_path *path)
struct ext4_ext_path *path,
int flags)
{
struct ext4_sb_info *sbi;
struct ext4_extent_header *eh;
Expand Down Expand Up @@ -3435,7 +3438,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
}

allocated = ext4_split_extent(handle, inode, path,
&split_map, split_flag, 0);
&split_map, split_flag, flags);
if (allocated < 0)
err = allocated;

Expand Down Expand Up @@ -3755,6 +3758,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
flags, allocated);
ext4_ext_show_leaf(inode, path);

/*
* When writing into uninitialized space, we should not fail to
* allocate metadata blocks for the new extent block if needed.
*/
flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;

trace_ext4_ext_handle_uninitialized_extents(inode, map, flags,
allocated, newblock);

Expand Down Expand Up @@ -3818,7 +3827,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
}

/* buffered write, writepage time, convert*/
ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags);
if (ret >= 0)
ext4_update_inode_fsync_trans(handle, inode, 1);
out:
Expand Down
11 changes: 10 additions & 1 deletion trunk/fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1688,12 +1688,21 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
*/
map.m_lblk = next;
map.m_len = max_blocks;
get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
/*
* We're in delalloc path and it is possible that we're going to
* need more metadata blocks than previously reserved. However
* we must not fail because we're in writeback and there is
* nothing we can do about it so it might result in data loss.
* So use reserved blocks to allocate metadata if possible.
*/
get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
EXT4_GET_BLOCKS_METADATA_NOFAIL;
if (ext4_should_dioread_nolock(mpd->inode))
get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
if (mpd->b_state & (1 << BH_Delay))
get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;


blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
if (blks < 0) {
struct super_block *sb = mpd->inode->i_sb;
Expand Down
84 changes: 81 additions & 3 deletions trunk/fs/ext4/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly);
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t);

#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext2_fs_type = {
Expand Down Expand Up @@ -2382,6 +2383,17 @@ struct ext4_attr {
int offset;
};

static int parse_strtoull(const char *buf,
unsigned long long max, unsigned long long *value)
{
int ret;

ret = kstrtoull(skip_spaces(buf), 0, value);
if (!ret && *value > max)
ret = -EINVAL;
return ret;
}

static int parse_strtoul(const char *buf,
unsigned long max, unsigned long *value)
{
Expand Down Expand Up @@ -2466,6 +2478,27 @@ static ssize_t sbi_ui_store(struct ext4_attr *a,
return count;
}

static ssize_t reserved_clusters_show(struct ext4_attr *a,
struct ext4_sb_info *sbi, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long) atomic64_read(&sbi->s_resv_clusters));
}

static ssize_t reserved_clusters_store(struct ext4_attr *a,
struct ext4_sb_info *sbi,
const char *buf, size_t count)
{
unsigned long long val;
int ret;

if (parse_strtoull(buf, -1ULL, &val))
return -EINVAL;
ret = ext4_reserve_clusters(sbi, val);

return ret ? ret : count;
}

static ssize_t trigger_test_error(struct ext4_attr *a,
struct ext4_sb_info *sbi,
const char *buf, size_t count)
Expand Down Expand Up @@ -2503,6 +2536,7 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
EXT4_RO_ATTR(delayed_allocation_blocks);
EXT4_RO_ATTR(session_write_kbytes);
EXT4_RO_ATTR(lifetime_write_kbytes);
EXT4_RW_ATTR(reserved_clusters);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
inode_readahead_blks_store, s_inode_readahead_blks);
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
Expand All @@ -2520,6 +2554,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(delayed_allocation_blocks),
ATTR_LIST(session_write_kbytes),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(reserved_clusters),
ATTR_LIST(inode_readahead_blks),
ATTR_LIST(inode_goal),
ATTR_LIST(mb_stats),
Expand Down Expand Up @@ -3195,6 +3230,40 @@ int ext4_calculate_overhead(struct super_block *sb)
return 0;
}


static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
{
ext4_fsblk_t resv_clusters;

/*
* By default we reserve 2% or 4096 clusters, whichever is smaller.
* This should cover the situations where we can not afford to run
* out of space like for example punch hole, or converting
* uninitialized extents in delalloc path. In most cases such
* allocation would require 1, or 2 blocks, higher numbers are
* very rare.
*/
resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;

do_div(resv_clusters, 50);
resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);

return resv_clusters;
}


static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count)
{
ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >>
sbi->s_cluster_bits;

if (count >= clusters)
return -EINVAL;

atomic64_set(&sbi->s_resv_clusters, count);
return 0;
}

static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
char *orig_data = kstrdup(data, GFP_KERNEL);
Expand Down Expand Up @@ -3918,6 +3987,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"available");
}

err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
if (err) {
ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
"reserved pool", ext4_calculate_resv_clusters(sbi));
goto failed_mount4a;
}

err = ext4_setup_system_zone(sb);
if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize system "
Expand Down Expand Up @@ -4750,9 +4826,10 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
struct super_block *sb = dentry->d_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
ext4_fsblk_t overhead = 0;
ext4_fsblk_t overhead = 0, resv_blocks;
u64 fsid;
s64 bfree;
resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));

if (!test_opt(sb, MINIX_DF))
overhead = sbi->s_overhead;
Expand All @@ -4764,8 +4841,9 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
/* prevent underflow in case that few free space is available */
buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
if (buf->f_bfree < ext4_r_blocks_count(es))
buf->f_bavail = buf->f_bfree -
(ext4_r_blocks_count(es) + resv_blocks);
if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
buf->f_bavail = 0;
buf->f_files = le32_to_cpu(es->s_inodes_count);
buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
Expand Down

0 comments on commit cffbae7

Please sign in to comment.