Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
Browse files Browse the repository at this point in the history
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: try committing transaction before returning ENOSPC
  Btrfs: add better -ENOSPC handling
  • Loading branch information
Linus Torvalds committed Feb 26, 2009
2 parents babb29b + 4e06bdd commit 64e7130
Show file tree
Hide file tree
Showing 6 changed files with 308 additions and 76 deletions.
8 changes: 8 additions & 0 deletions fs/btrfs/btrfs_inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ struct btrfs_inode {
*/
struct list_head delalloc_inodes;

/* the space_info for where this inode's data allocations are done */
struct btrfs_space_info *space_info;

/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
Expand Down Expand Up @@ -94,6 +97,11 @@ struct btrfs_inode {
*/
u64 delalloc_bytes;

/* total number of bytes that may be used for this inode for
* delalloc
*/
u64 reserved_bytes;

/*
* the size of the file stored in the metadata on disk. data=ordered
* means the in-memory i_size might be larger than the size on disk
Expand Down
40 changes: 31 additions & 9 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,13 +596,27 @@ struct btrfs_block_group_item {

struct btrfs_space_info {
u64 flags;
u64 total_bytes;
u64 bytes_used;
u64 bytes_pinned;
u64 bytes_reserved;
u64 bytes_readonly;
int full;
int force_alloc;

u64 total_bytes; /* total bytes in the space */
u64 bytes_used; /* total bytes used on disk */
u64 bytes_pinned; /* total bytes pinned, will be freed when the
transaction finishes */
u64 bytes_reserved; /* total bytes the allocator has reserved for
current allocations */
u64 bytes_readonly; /* total bytes that are read only */

/* delalloc accounting */
u64 bytes_delalloc; /* number of bytes reserved for allocation,
this space is not necessarily reserved yet
by the allocator */
u64 bytes_may_use; /* number of bytes that may be used for
delalloc */

int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
this space */

struct list_head list;

/* for block groups in our same type */
Expand Down Expand Up @@ -1782,6 +1796,16 @@ int btrfs_add_dead_reloc_root(struct btrfs_root *root);
int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
int btrfs_check_metadata_free_space(struct btrfs_root *root);
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_free_reserved_data_space(struct btrfs_root *root,
struct inode *inode, u64 bytes);
void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
/* ctree.c */
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
Expand Down Expand Up @@ -2027,8 +2051,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
unsigned long btrfs_force_ra(struct address_space *mapping,
struct file_ra_state *ra, struct file *file,
pgoff_t offset, pgoff_t last_index);
int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
int for_del);
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_delete_inode(struct inode *inode);
Expand Down
252 changes: 237 additions & 15 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free);

static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force);

static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{
return (cache->flags & bits) == bits;
Expand Down Expand Up @@ -1909,6 +1913,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_pinned = 0;
found->bytes_reserved = 0;
found->bytes_readonly = 0;
found->bytes_delalloc = 0;
found->full = 0;
found->force_alloc = 0;
*space_info = found;
Expand Down Expand Up @@ -1972,6 +1977,233 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
return flags;
}

static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
{
struct btrfs_fs_info *info = root->fs_info;
u64 alloc_profile;

if (data) {
alloc_profile = info->avail_data_alloc_bits &
info->data_alloc_profile;
data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
} else if (root == root->fs_info->chunk_root) {
alloc_profile = info->avail_system_alloc_bits &
info->system_alloc_profile;
data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
} else {
alloc_profile = info->avail_metadata_alloc_bits &
info->metadata_alloc_profile;
data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
}

return btrfs_reduce_alloc_profile(root, data);
}

void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
{
u64 alloc_target;

alloc_target = btrfs_get_alloc_profile(root, 1);
BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
alloc_target);
}

/*
* for now this just makes sure we have at least 5% of our metadata space free
* for use.
*/
int btrfs_check_metadata_free_space(struct btrfs_root *root)
{
struct btrfs_fs_info *info = root->fs_info;
struct btrfs_space_info *meta_sinfo;
u64 alloc_target, thresh;
int committed = 0, ret;

/* get the space info for where the metadata will live */
alloc_target = btrfs_get_alloc_profile(root, 0);
meta_sinfo = __find_space_info(info, alloc_target);

again:
spin_lock(&meta_sinfo->lock);
if (!meta_sinfo->full)
thresh = meta_sinfo->total_bytes * 80;
else
thresh = meta_sinfo->total_bytes * 95;

do_div(thresh, 100);

if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
struct btrfs_trans_handle *trans;
if (!meta_sinfo->full) {
meta_sinfo->force_alloc = 1;
spin_unlock(&meta_sinfo->lock);

trans = btrfs_start_transaction(root, 1);
if (!trans)
return -ENOMEM;

ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2 * 1024 * 1024, alloc_target, 0);
btrfs_end_transaction(trans, root);
goto again;
}
spin_unlock(&meta_sinfo->lock);

if (!committed) {
committed = 1;
trans = btrfs_join_transaction(root, 1);
if (!trans)
return -ENOMEM;
ret = btrfs_commit_transaction(trans, root);
if (ret)
return ret;
goto again;
}
return -ENOSPC;
}
spin_unlock(&meta_sinfo->lock);

return 0;
}

/*
* This will check the space that the inode allocates from to make sure we have
* enough space for bytes.
*/
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes)
{
struct btrfs_space_info *data_sinfo;
int ret = 0, committed = 0;

/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);

data_sinfo = BTRFS_I(inode)->space_info;
again:
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
if (data_sinfo->total_bytes - data_sinfo->bytes_used -
data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
data_sinfo->bytes_may_use < bytes) {
struct btrfs_trans_handle *trans;

/*
* if we don't have enough free bytes in this space then we need
* to alloc a new chunk.
*/
if (!data_sinfo->full) {
u64 alloc_target;

data_sinfo->force_alloc = 1;
spin_unlock(&data_sinfo->lock);

alloc_target = btrfs_get_alloc_profile(root, 1);
trans = btrfs_start_transaction(root, 1);
if (!trans)
return -ENOMEM;

ret = do_chunk_alloc(trans, root->fs_info->extent_root,
bytes + 2 * 1024 * 1024,
alloc_target, 0);
btrfs_end_transaction(trans, root);
if (ret)
return ret;
goto again;
}
spin_unlock(&data_sinfo->lock);

/* commit the current transaction and try again */
if (!committed) {
committed = 1;
trans = btrfs_join_transaction(root, 1);
if (!trans)
return -ENOMEM;
ret = btrfs_commit_transaction(trans, root);
if (ret)
return ret;
goto again;
}

printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
", %llu bytes_used, %llu bytes_reserved, "
"%llu bytes_pinned, %llu bytes_readonly, %llu may use"
"%llu total\n", bytes, data_sinfo->bytes_delalloc,
data_sinfo->bytes_used, data_sinfo->bytes_reserved,
data_sinfo->bytes_pinned, data_sinfo->bytes_readonly,
data_sinfo->bytes_may_use, data_sinfo->total_bytes);
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
BTRFS_I(inode)->reserved_bytes += bytes;
spin_unlock(&data_sinfo->lock);

return btrfs_check_metadata_free_space(root);
}

/*
* if there was an error for whatever reason after calling
* btrfs_check_data_free_space, call this so we can cleanup the counters.
*/
void btrfs_free_reserved_data_space(struct btrfs_root *root,
struct inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;

/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);

data_sinfo = BTRFS_I(inode)->space_info;
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
BTRFS_I(inode)->reserved_bytes -= bytes;
spin_unlock(&data_sinfo->lock);
}

/* called when we are adding a delalloc extent to the inode's io_tree */
void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes)
{
struct btrfs_space_info *data_sinfo;

/* get the space info for where this inode will be storing its data */
data_sinfo = BTRFS_I(inode)->space_info;

/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_delalloc += bytes;

/*
* we are adding a delalloc extent without calling
* btrfs_check_data_free_space first. This happens on a weird
* writepage condition, but shouldn't hurt our accounting
*/
if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
BTRFS_I(inode)->reserved_bytes = 0;
} else {
data_sinfo->bytes_may_use -= bytes;
BTRFS_I(inode)->reserved_bytes -= bytes;
}

spin_unlock(&data_sinfo->lock);
}

/* called when we are clearing an delalloc extent from the inode's io_tree */
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes)
{
struct btrfs_space_info *info;

info = BTRFS_I(inode)->space_info;

spin_lock(&info->lock);
info->bytes_delalloc -= bytes;
spin_unlock(&info->lock);
}

static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force)
Expand Down Expand Up @@ -3105,6 +3337,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
(unsigned long long)(info->total_bytes - info->bytes_used -
info->bytes_pinned - info->bytes_reserved),
(info->full) ? "" : "not ");
printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
" may_use=%llu, used=%llu\n", info->total_bytes,
info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use,
info->bytes_used);

down_read(&info->groups_sem);
list_for_each_entry(cache, &info->block_groups, list) {
Expand All @@ -3131,24 +3367,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
{
int ret;
u64 search_start = 0;
u64 alloc_profile;
struct btrfs_fs_info *info = root->fs_info;

if (data) {
alloc_profile = info->avail_data_alloc_bits &
info->data_alloc_profile;
data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
} else if (root == root->fs_info->chunk_root) {
alloc_profile = info->avail_system_alloc_bits &
info->system_alloc_profile;
data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
} else {
alloc_profile = info->avail_metadata_alloc_bits &
info->metadata_alloc_profile;
data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
}
data = btrfs_get_alloc_profile(root, data);
again:
data = btrfs_reduce_alloc_profile(root, data);
/*
* the only place that sets empty_size is btrfs_realloc_node, which
* is not called recursively on allocations
Expand Down
Loading

0 comments on commit 64e7130

Please sign in to comment.