diff --git a/[refs] b/[refs] index b6ff7d475b00..cbf27453f166 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: cd1cfc49153ba2bef247e500d8bd4d135193ece9 +refs/heads/master: 362a20c5e27614739c46707d1c5f55c214d164ce diff --git a/trunk/fs/btrfs/Makefile b/trunk/fs/btrfs/Makefile index 0bc4d3a10a5f..0c4fa2befae7 100644 --- a/trunk/fs/btrfs/Makefile +++ b/trunk/fs/btrfs/Makefile @@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ - reada.o backref.o ulist.o qgroup.o + reada.o backref.o ulist.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/trunk/fs/btrfs/async-thread.c b/trunk/fs/btrfs/async-thread.c index 58b7d14b08ee..42704149b723 100644 --- a/trunk/fs/btrfs/async-thread.c +++ b/trunk/fs/btrfs/async-thread.c @@ -206,17 +206,10 @@ static noinline void run_ordered_completions(struct btrfs_workers *workers, work->ordered_func(work); - /* now take the lock again and drop our item from the list */ + /* now take the lock again and call the freeing code */ spin_lock(&workers->order_lock); list_del(&work->order_list); - spin_unlock(&workers->order_lock); - - /* - * we don't want to call the ordered free functions - * with the lock held though - */ work->ordered_free(work); - spin_lock(&workers->order_lock); } spin_unlock(&workers->order_lock); diff --git a/trunk/fs/btrfs/backref.c b/trunk/fs/btrfs/backref.c index 7d80ddd8f544..a383c18e74e8 100644 --- a/trunk/fs/btrfs/backref.c +++ b/trunk/fs/btrfs/backref.c @@ -773,8 +773,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, */ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist *refs, - struct ulist *roots, const u64 *extent_item_pos) + u64 delayed_ref_seq, u64 time_seq, + struct ulist *refs, struct ulist *roots, + const u64 *extent_item_pos) { struct btrfs_key key; struct btrfs_path *path; @@ -836,7 +837,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, btrfs_put_delayed_ref(&head->node); goto again; } - ret = __add_delayed_refs(head, time_seq, + ret = __add_delayed_refs(head, delayed_ref_seq, &prefs_delayed); mutex_unlock(&head->mutex); if (ret) { @@ -980,7 +981,8 @@ static void free_leaf_list(struct ulist *blocks) */ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **leafs, + u64 delayed_ref_seq, u64 time_seq, + struct ulist **leafs, const u64 *extent_item_pos) { struct ulist *tmp; @@ -995,7 +997,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, return -ENOMEM; } - ret = find_parent_nodes(trans, fs_info, bytenr, + ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, time_seq, *leafs, tmp, extent_item_pos); ulist_free(tmp); @@ -1022,7 +1024,8 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, */ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **roots) + u64 delayed_ref_seq, u64 time_seq, + struct ulist **roots) { struct ulist *tmp; struct ulist_node *node = NULL; @@ -1040,7 +1043,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, ULIST_ITER_INIT(&uiter); while (1) { - ret = find_parent_nodes(trans, fs_info, bytenr, + ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, time_seq, tmp, *roots, NULL); if (ret < 0 && ret != -ENOENT) { ulist_free(tmp); @@ -1373,9 +1376,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, struct ulist *roots = NULL; struct ulist_node *ref_node = NULL; struct ulist_node *root_node = NULL; + struct seq_list seq_elem = {}; struct seq_list tree_mod_seq_elem = {}; struct ulist_iterator ref_uiter; struct ulist_iterator root_uiter; + struct btrfs_delayed_ref_root *delayed_refs = NULL; pr_debug("resolving all inodes for extent %llu\n", extent_item_objectid); @@ -1386,11 +1391,16 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, trans = btrfs_join_transaction(fs_info->extent_root); if (IS_ERR(trans)) return PTR_ERR(trans); + + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + btrfs_get_delayed_seq(delayed_refs, &seq_elem); + spin_unlock(&delayed_refs->lock); btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); } ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, - tree_mod_seq_elem.seq, &refs, + seq_elem.seq, tree_mod_seq_elem.seq, &refs, &extent_item_pos); if (ret) goto out; @@ -1398,7 +1408,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, ULIST_ITER_INIT(&ref_uiter); while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, - tree_mod_seq_elem.seq, &roots); + seq_elem.seq, + tree_mod_seq_elem.seq, &roots); if (ret) break; ULIST_ITER_INIT(&root_uiter); @@ -1420,6 +1431,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, out: if (!search_commit_root) { btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); + btrfs_put_delayed_seq(delayed_refs, &seq_elem); btrfs_end_transaction(trans, fs_info->extent_root); } diff --git a/trunk/fs/btrfs/backref.h b/trunk/fs/btrfs/backref.h index 3a1ad3e2dcb0..c18d8ac7b795 100644 --- a/trunk/fs/btrfs/backref.h +++ b/trunk/fs/btrfs/backref.h @@ -58,7 +58,8 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **roots); + u64 delayed_ref_seq, u64 time_seq, + struct ulist **roots); struct btrfs_data_container *init_data_container(u32 total_bytes); struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, diff --git a/trunk/fs/btrfs/btrfs_inode.h b/trunk/fs/btrfs/btrfs_inode.h index 5b2ad6bc4fe7..12394a90d60f 100644 --- a/trunk/fs/btrfs/btrfs_inode.h +++ b/trunk/fs/btrfs/btrfs_inode.h @@ -87,6 +87,9 @@ struct btrfs_inode { /* node for the red-black tree that links inodes in subvolume root */ struct rb_node rb_node; + /* the space_info for where this inode's data allocations are done */ + struct btrfs_space_info *space_info; + unsigned long runtime_flags; /* full 64 bit generation number, struct vfs_inode doesn't have a big @@ -188,14 +191,11 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) BTRFS_I(inode)->disk_i_size = size; } -static inline bool btrfs_is_free_space_inode(struct inode *inode) +static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, + struct inode *inode) { - struct btrfs_root *root = BTRFS_I(inode)->root; - - if (root == root->fs_info->tree_root && - btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID) - return true; - if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) + if (root == root->fs_info->tree_root || + BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) return true; return false; } diff --git a/trunk/fs/btrfs/ctree.c b/trunk/fs/btrfs/ctree.c index fb21431fe4e0..8206b3900587 100644 --- a/trunk/fs/btrfs/ctree.c +++ b/trunk/fs/btrfs/ctree.c @@ -321,7 +321,7 @@ struct tree_mod_root { struct tree_mod_elem { struct rb_node node; u64 index; /* shifted logical */ - u64 seq; + struct seq_list elem; enum mod_log_op op; /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ @@ -341,50 +341,20 @@ struct tree_mod_elem { struct tree_mod_root old_root; }; -static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info) -{ - read_lock(&fs_info->tree_mod_log_lock); -} - -static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info) -{ - read_unlock(&fs_info->tree_mod_log_lock); -} - -static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info) -{ - write_lock(&fs_info->tree_mod_log_lock); -} - -static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info) +static inline void +__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) { - write_unlock(&fs_info->tree_mod_log_lock); + elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); + list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); } -/* - * This adds a new blocker to the tree mod log's blocker list if the @elem - * passed does not already have a sequence number set. So when a caller expects - * to record tree modifications, it should ensure to set elem->seq to zero - * before calling btrfs_get_tree_mod_seq. - * Returns a fresh, unused tree log modification sequence number, even if no new - * blocker was added. - */ -u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, - struct seq_list *elem) +void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, + struct seq_list *elem) { - u64 seq; - - tree_mod_log_write_lock(fs_info); + elem->flags = 1; spin_lock(&fs_info->tree_mod_seq_lock); - if (!elem->seq) { - elem->seq = btrfs_inc_tree_mod_seq(fs_info); - list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); - } - seq = btrfs_inc_tree_mod_seq(fs_info); + __get_tree_mod_seq(fs_info, elem); spin_unlock(&fs_info->tree_mod_seq_lock); - tree_mod_log_write_unlock(fs_info); - - return seq; } void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, @@ -401,46 +371,41 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, if (!seq_putting) return; + BUG_ON(!(elem->flags & 1)); spin_lock(&fs_info->tree_mod_seq_lock); list_del(&elem->list); - elem->seq = 0; list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { - if (cur_elem->seq < min_seq) { + if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { if (seq_putting > cur_elem->seq) { /* * blocker with lower sequence number exists, we * cannot remove anything from the log */ - spin_unlock(&fs_info->tree_mod_seq_lock); - return; + goto out; } min_seq = cur_elem->seq; } } - spin_unlock(&fs_info->tree_mod_seq_lock); - - /* - * we removed the lowest blocker from the blocker list, so there may be - * more processible delayed refs. - */ - wake_up(&fs_info->tree_mod_seq_wait); /* * anything that's lower than the lowest existing (read: blocked) * sequence number can be removed from the tree. */ - tree_mod_log_write_lock(fs_info); + write_lock(&fs_info->tree_mod_log_lock); tm_root = &fs_info->tree_mod_log; for (node = rb_first(tm_root); node; node = next) { next = rb_next(node); tm = container_of(node, struct tree_mod_elem, node); - if (tm->seq > min_seq) + if (tm->elem.seq > min_seq) continue; rb_erase(node, tm_root); + list_del(&tm->elem.list); kfree(tm); } - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); +out: + spin_unlock(&fs_info->tree_mod_seq_lock); } /* @@ -458,9 +423,11 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) struct rb_node **new; struct rb_node *parent = NULL; struct tree_mod_elem *cur; + int ret = 0; - BUG_ON(!tm || !tm->seq); + BUG_ON(!tm || !tm->elem.seq); + write_lock(&fs_info->tree_mod_log_lock); tm_root = &fs_info->tree_mod_log; new = &tm_root->rb_node; while (*new) { @@ -470,81 +437,88 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) new = &((*new)->rb_left); else if (cur->index > tm->index) new = &((*new)->rb_right); - else if (cur->seq < tm->seq) + else if (cur->elem.seq < tm->elem.seq) new = &((*new)->rb_left); - else if (cur->seq > tm->seq) + else if (cur->elem.seq > tm->elem.seq) new = &((*new)->rb_right); else { kfree(tm); - return -EEXIST; + ret = -EEXIST; + goto unlock; } } rb_link_node(&tm->node, parent, new); rb_insert_color(&tm->node, tm_root); - return 0; +unlock: + write_unlock(&fs_info->tree_mod_log_lock); + return ret; } -/* - * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it - * returns zero with the tree_mod_log_lock acquired. The caller must hold - * this until all tree mod log insertions are recorded in the rb tree and then - * call tree_mod_log_write_unlock() to release. - */ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { smp_mb(); if (list_empty(&(fs_info)->tree_mod_seq_list)) return 1; - if (eb && btrfs_header_level(eb) == 0) - return 1; - - tree_mod_log_write_lock(fs_info); - if (list_empty(&fs_info->tree_mod_seq_list)) { - /* - * someone emptied the list while we were waiting for the lock. - * we must not add to the list when no blocker exists. - */ - tree_mod_log_write_unlock(fs_info); + if (!eb) + return 0; + if (btrfs_header_level(eb) == 0) return 1; - } - return 0; } /* - * This allocates memory and gets a tree modification sequence number. + * This allocates memory and gets a tree modification sequence number when + * needed. * - * Returns <0 on error. - * Returns >0 (the added sequence number) on success. + * Returns 0 when no sequence number is needed, < 0 on error. + * Returns 1 when a sequence number was added. In this case, + * fs_info->tree_mod_seq_lock was acquired and must be released by the caller + * after inserting into the rb tree. */ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, struct tree_mod_elem **tm_ret) { struct tree_mod_elem *tm; + int seq; - /* - * once we switch from spin locks to something different, we should - * honor the flags parameter here. - */ - tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC); + if (tree_mod_dont_log(fs_info, NULL)) + return 0; + + tm = *tm_ret = kzalloc(sizeof(*tm), flags); if (!tm) return -ENOMEM; - tm->seq = btrfs_inc_tree_mod_seq(fs_info); - return tm->seq; + tm->elem.flags = 0; + spin_lock(&fs_info->tree_mod_seq_lock); + if (list_empty(&fs_info->tree_mod_seq_list)) { + /* + * someone emptied the list while we were waiting for the lock. + * we must not add to the list, because no blocker exists. items + * are removed from the list only when the existing blocker is + * removed from the list. + */ + kfree(tm); + seq = 0; + spin_unlock(&fs_info->tree_mod_seq_lock); + } else { + __get_tree_mod_seq(fs_info, &tm->elem); + seq = tm->elem.seq; + } + + return seq; } -static inline int -__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op, gfp_t flags) +static noinline int +tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot, + enum mod_log_op op, gfp_t flags) { - int ret; struct tree_mod_elem *tm; + int ret; ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) + if (ret <= 0) return ret; tm->index = eb->start >> PAGE_CACHE_SHIFT; @@ -556,22 +530,8 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, tm->slot = slot; tm->generation = btrfs_node_ptr_generation(eb, slot); - return __tree_mod_log_insert(fs_info, tm); -} - -static noinline int -tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op, gfp_t flags) -{ - int ret; - - if (tree_mod_dont_log(fs_info, eb)) - return 0; - - ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); - - tree_mod_log_write_unlock(fs_info); + ret = __tree_mod_log_insert(fs_info, tm); + spin_unlock(&fs_info->tree_mod_seq_lock); return ret; } @@ -582,14 +542,6 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); } -static noinline int -tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op) -{ - return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS); -} - static noinline int tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, int dst_slot, int src_slot, @@ -603,14 +555,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, return 0; for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, + ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, MOD_LOG_KEY_REMOVE_WHILE_MOVING); BUG_ON(ret < 0); } ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - goto out; + if (ret <= 0) + return ret; tm->index = eb->start >> PAGE_CACHE_SHIFT; tm->slot = src_slot; @@ -619,26 +571,10 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, tm->op = MOD_LOG_MOVE_KEYS; ret = __tree_mod_log_insert(fs_info, tm); -out: - tree_mod_log_write_unlock(fs_info); + spin_unlock(&fs_info->tree_mod_seq_lock); return ret; } -static inline void -__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) -{ - int i; - u32 nritems; - int ret; - - nritems = btrfs_header_nritems(eb); - for (i = nritems - 1; i >= 0; i--) { - ret = tree_mod_log_insert_key_locked(fs_info, eb, i, - MOD_LOG_KEY_REMOVE_WHILE_FREEING); - BUG_ON(ret < 0); - } -} - static noinline int tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, struct extent_buffer *old_root, @@ -647,14 +583,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm; int ret; - if (tree_mod_dont_log(fs_info, NULL)) - return 0; - - __tree_mod_log_free_eb(fs_info, old_root); - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - goto out; + if (ret <= 0) + return ret; tm->index = new_root->start >> PAGE_CACHE_SHIFT; tm->old_root.logical = old_root->start; @@ -663,8 +594,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, tm->op = MOD_LOG_ROOT_REPLACE; ret = __tree_mod_log_insert(fs_info, tm); -out: - tree_mod_log_write_unlock(fs_info); + spin_unlock(&fs_info->tree_mod_seq_lock); return ret; } @@ -678,7 +608,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, struct tree_mod_elem *found = NULL; u64 index = start >> PAGE_CACHE_SHIFT; - tree_mod_log_read_lock(fs_info); + read_lock(&fs_info->tree_mod_log_lock); tm_root = &fs_info->tree_mod_log; node = tm_root->rb_node; while (node) { @@ -687,18 +617,18 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, node = node->rb_left; } else if (cur->index > index) { node = node->rb_right; - } else if (cur->seq < min_seq) { + } else if (cur->elem.seq < min_seq) { node = node->rb_left; } else if (!smallest) { /* we want the node with the highest seq */ if (found) - BUG_ON(found->seq > cur->seq); + BUG_ON(found->elem.seq > cur->elem.seq); found = cur; node = node->rb_left; - } else if (cur->seq > min_seq) { + } else if (cur->elem.seq > min_seq) { /* we want the node with the smallest seq */ if (found) - BUG_ON(found->seq < cur->seq); + BUG_ON(found->elem.seq < cur->elem.seq); found = cur; node = node->rb_right; } else { @@ -706,7 +636,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, break; } } - tree_mod_log_read_unlock(fs_info); + read_unlock(&fs_info->tree_mod_log_lock); return found; } @@ -734,7 +664,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) return __tree_mod_log_search(fs_info, start, min_seq, 0); } -static noinline void +static inline void tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, struct extent_buffer *src, unsigned long dst_offset, unsigned long src_offset, int nr_items) @@ -745,23 +675,18 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, if (tree_mod_dont_log(fs_info, NULL)) return; - if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) { - tree_mod_log_write_unlock(fs_info); + if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) return; - } + /* speed this up by single seq for all operations? */ for (i = 0; i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, src, - i + src_offset, - MOD_LOG_KEY_REMOVE); + ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, + MOD_LOG_KEY_REMOVE); BUG_ON(ret < 0); - ret = tree_mod_log_insert_key_locked(fs_info, dst, - i + dst_offset, - MOD_LOG_KEY_ADD); + ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, + MOD_LOG_KEY_ADD); BUG_ON(ret < 0); } - - tree_mod_log_write_unlock(fs_info); } static inline void @@ -774,7 +699,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, BUG_ON(ret < 0); } -static noinline void +static inline void tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int slot, int atomic) @@ -787,22 +712,30 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, BUG_ON(ret < 0); } -static noinline void -tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) +static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb) { + int i; + int ret; + u32 nritems; + if (tree_mod_dont_log(fs_info, eb)) return; - __tree_mod_log_free_eb(fs_info, eb); - - tree_mod_log_write_unlock(fs_info); + nritems = btrfs_header_nritems(eb); + for (i = nritems - 1; i >= 0; i--) { + ret = tree_mod_log_insert_key(fs_info, eb, i, + MOD_LOG_KEY_REMOVE_WHILE_FREEING); + BUG_ON(ret < 0); + } } -static noinline void +static inline void tree_mod_log_set_root_pointer(struct btrfs_root *root, struct extent_buffer *new_root_node) { int ret; + tree_mod_log_free_eb(root->fs_info, root->node); ret = tree_mod_log_insert_root(root->fs_info, root->node, new_root_node, GFP_NOFS); BUG_ON(ret < 0); @@ -1136,7 +1069,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, unsigned long p_size = sizeof(struct btrfs_key_ptr); n = btrfs_header_nritems(eb); - while (tm && tm->seq >= time_seq) { + while (tm && tm->elem.seq >= time_seq) { /* * all the operations are recorded with the operator used for * the modification. as we're going backwards, we do the @@ -2788,78 +2721,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, return ret; } -/* - * helper to use instead of search slot if no exact match is needed but - * instead the next or previous item should be returned. - * When find_higher is true, the next higher item is returned, the next lower - * otherwise. - * When return_any and find_higher are both true, and no higher item is found, - * return the next lower instead. - * When return_any is true and find_higher is false, and no lower item is found, - * return the next higher instead. - * It returns 0 if any item is found, 1 if none is found (tree empty), and - * < 0 on error - */ -int btrfs_search_slot_for_read(struct btrfs_root *root, - struct btrfs_key *key, struct btrfs_path *p, - int find_higher, int return_any) -{ - int ret; - struct extent_buffer *leaf; - -again: - ret = btrfs_search_slot(NULL, root, key, p, 0, 0); - if (ret <= 0) - return ret; - /* - * a return value of 1 means the path is at the position where the - * item should be inserted. Normally this is the next bigger item, - * but in case the previous item is the last in a leaf, path points - * to the first free slot in the previous leaf, i.e. at an invalid - * item. - */ - leaf = p->nodes[0]; - - if (find_higher) { - if (p->slots[0] >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(root, p); - if (ret <= 0) - return ret; - if (!return_any) - return 1; - /* - * no higher item found, return the next - * lower instead - */ - return_any = 0; - find_higher = 0; - btrfs_release_path(p); - goto again; - } - } else { - if (p->slots[0] >= btrfs_header_nritems(leaf)) { - /* we're sitting on an invalid slot */ - if (p->slots[0] == 0) { - ret = btrfs_prev_leaf(root, p); - if (ret <= 0) - return ret; - if (!return_any) - return 1; - /* - * no lower item found, return the next - * higher instead - */ - return_any = 0; - find_higher = 1; - btrfs_release_path(p); - goto again; - } - --p->slots[0]; - } - } - return 0; -} - /* * adjust the pointers going up the tree, starting at level * making sure the right key of each node is points to 'key'. @@ -5266,7 +5127,6 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, * locked. To solve this situation, we give up * on our lock and cycle. */ - free_extent_buffer(next); btrfs_release_path(path); cond_resched(); goto again; diff --git a/trunk/fs/btrfs/ctree.h b/trunk/fs/btrfs/ctree.h index 0f369da5cd97..fa5c45b39075 100644 --- a/trunk/fs/btrfs/ctree.h +++ b/trunk/fs/btrfs/ctree.h @@ -91,9 +91,6 @@ struct btrfs_ordered_sum; /* for storing balance parameters in the root tree */ #define BTRFS_BALANCE_OBJECTID -4ULL -/* holds quota configuration and tracking */ -#define BTRFS_QUOTA_TREE_OBJECTID 8ULL - /* orhpan objectid for tracking unlinked/truncated files */ #define BTRFS_ORPHAN_OBJECTID -5ULL @@ -886,72 +883,6 @@ struct btrfs_block_group_item { __le64 flags; } __attribute__ ((__packed__)); -/* - * is subvolume quota turned on? - */ -#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) -/* - * SCANNING is set during the initialization phase - */ -#define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1) -/* - * Some qgroup entries are known to be out of date, - * either because the configuration has changed in a way that - * makes a rescan necessary, or because the fs has been mounted - * with a non-qgroup-aware version. - * Turning qouta off and on again makes it inconsistent, too. - */ -#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2) - -#define BTRFS_QGROUP_STATUS_VERSION 1 - -struct btrfs_qgroup_status_item { - __le64 version; - /* - * the generation is updated during every commit. As older - * versions of btrfs are not aware of qgroups, it will be - * possible to detect inconsistencies by checking the - * generation on mount time - */ - __le64 generation; - - /* flag definitions see above */ - __le64 flags; - - /* - * only used during scanning to record the progress - * of the scan. It contains a logical address - */ - __le64 scan; -} __attribute__ ((__packed__)); - -struct btrfs_qgroup_info_item { - __le64 generation; - __le64 rfer; - __le64 rfer_cmpr; - __le64 excl; - __le64 excl_cmpr; -} __attribute__ ((__packed__)); - -/* flags definition for qgroup limits */ -#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0) -#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1) -#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2) -#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3) -#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4) -#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5) - -struct btrfs_qgroup_limit_item { - /* - * only updated when any of the other values change - */ - __le64 flags; - __le64 max_rfer; - __le64 max_excl; - __le64 rsv_rfer; - __le64 rsv_excl; -} __attribute__ ((__packed__)); - struct btrfs_space_info { u64 flags; @@ -1099,13 +1030,6 @@ struct btrfs_block_group_cache { struct list_head cluster_list; }; -/* delayed seq elem */ -struct seq_list { - struct list_head list; - u64 seq; -}; - -/* fs_info */ struct reloc_control; struct btrfs_device; struct btrfs_fs_devices; @@ -1120,7 +1044,6 @@ struct btrfs_fs_info { struct btrfs_root *dev_root; struct btrfs_root *fs_root; struct btrfs_root *csum_root; - struct btrfs_root *quota_root; /* the log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; @@ -1221,8 +1144,6 @@ struct btrfs_fs_info { spinlock_t tree_mod_seq_lock; atomic_t tree_mod_seq; struct list_head tree_mod_seq_list; - struct seq_list tree_mod_seq_elem; - wait_queue_head_t tree_mod_seq_wait; /* this protects tree_mod_log */ rwlock_t tree_mod_log_lock; @@ -1319,8 +1240,6 @@ struct btrfs_fs_info { */ struct list_head space_info; - struct btrfs_space_info *data_sinfo; - struct reloc_control *reloc_ctl; spinlock_t delalloc_lock; @@ -1377,29 +1296,6 @@ struct btrfs_fs_info { #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY u32 check_integrity_print_mask; #endif - /* - * quota information - */ - unsigned int quota_enabled:1; - - /* - * quota_enabled only changes state after a commit. This holds the - * next state. - */ - unsigned int pending_quota_state:1; - - /* is qgroup tracking in a consistent state? */ - u64 qgroup_flags; - - /* holds configuration and tracking. Protected by qgroup_lock */ - struct rb_root qgroup_tree; - spinlock_t qgroup_lock; - - /* list of dirty qgroups to be written at next commit */ - struct list_head dirty_qgroups; - - /* used by btrfs_qgroup_record_ref for an efficient tree traversal */ - u64 qgroup_seq; /* filesystem state */ u64 fs_state; @@ -1629,30 +1525,6 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_DEV_ITEM_KEY 216 #define BTRFS_CHUNK_ITEM_KEY 228 -/* - * Records the overall state of the qgroups. - * There's only one instance of this key present, - * (0, BTRFS_QGROUP_STATUS_KEY, 0) - */ -#define BTRFS_QGROUP_STATUS_KEY 240 -/* - * Records the currently used space of the qgroup. - * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid). - */ -#define BTRFS_QGROUP_INFO_KEY 242 -/* - * Contains the user configured limits for the qgroup. - * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid). - */ -#define BTRFS_QGROUP_LIMIT_KEY 244 -/* - * Records the child-parent relationship of qgroups. For - * each relation, 2 keys are present: - * (childid, BTRFS_QGROUP_RELATION_KEY, parentid) - * (parentid, BTRFS_QGROUP_RELATION_KEY, childid) - */ -#define BTRFS_QGROUP_RELATION_KEY 246 - #define BTRFS_BALANCE_ITEM_KEY 248 /* @@ -1749,54 +1621,13 @@ static inline void btrfs_init_map_token (struct btrfs_map_token *token) offsetof(type, member), \ sizeof(((type *)0)->member))) -#define DECLARE_BTRFS_SETGET_BITS(bits) \ -u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ - unsigned long off, \ - struct btrfs_map_token *token); \ -void btrfs_set_token_##bits(struct extent_buffer *eb, void *ptr, \ - unsigned long off, u##bits val, \ - struct btrfs_map_token *token); \ -static inline u##bits btrfs_get_##bits(struct extent_buffer *eb, void *ptr, \ - unsigned long off) \ -{ \ - return btrfs_get_token_##bits(eb, ptr, off, NULL); \ -} \ -static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \ - unsigned long off, u##bits val) \ -{ \ - btrfs_set_token_##bits(eb, ptr, off, val, NULL); \ -} - -DECLARE_BTRFS_SETGET_BITS(8) -DECLARE_BTRFS_SETGET_BITS(16) -DECLARE_BTRFS_SETGET_BITS(32) -DECLARE_BTRFS_SETGET_BITS(64) - +#ifndef BTRFS_SETGET_FUNCS #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ -static inline u##bits btrfs_##name(struct extent_buffer *eb, type *s) \ -{ \ - BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ - return btrfs_get_##bits(eb, s, offsetof(type, member)); \ -} \ -static inline void btrfs_set_##name(struct extent_buffer *eb, type *s, \ - u##bits val) \ -{ \ - BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ - btrfs_set_##bits(eb, s, offsetof(type, member), val); \ -} \ -static inline u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, \ - struct btrfs_map_token *token) \ -{ \ - BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ - return btrfs_get_token_##bits(eb, s, offsetof(type, member), token); \ -} \ -static inline void btrfs_set_token_##name(struct extent_buffer *eb, \ - type *s, u##bits val, \ - struct btrfs_map_token *token) \ -{ \ - BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ - btrfs_set_token_##bits(eb, s, offsetof(type, member), val, token); \ -} +u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ +u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, struct btrfs_map_token *token); \ +void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token);\ +void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); +#endif #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ @@ -2634,49 +2465,6 @@ static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb, sizeof(val)); } -/* btrfs_qgroup_status_item */ -BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item, - generation, 64); -BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item, - version, 64); -BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, - flags, 64); -BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item, - scan, 64); - -/* btrfs_qgroup_info_item */ -BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, - generation, 64); -BTRFS_SETGET_FUNCS(qgroup_info_rfer, struct btrfs_qgroup_info_item, rfer, 64); -BTRFS_SETGET_FUNCS(qgroup_info_rfer_cmpr, struct btrfs_qgroup_info_item, - rfer_cmpr, 64); -BTRFS_SETGET_FUNCS(qgroup_info_excl, struct btrfs_qgroup_info_item, excl, 64); -BTRFS_SETGET_FUNCS(qgroup_info_excl_cmpr, struct btrfs_qgroup_info_item, - excl_cmpr, 64); - -BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation, - struct btrfs_qgroup_info_item, generation, 64); -BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer, struct btrfs_qgroup_info_item, - rfer, 64); -BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer_cmpr, - struct btrfs_qgroup_info_item, rfer_cmpr, 64); -BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl, struct btrfs_qgroup_info_item, - excl, 64); -BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl_cmpr, - struct btrfs_qgroup_info_item, excl_cmpr, 64); - -/* btrfs_qgroup_limit_item */ -BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item, - flags, 64); -BTRFS_SETGET_FUNCS(qgroup_limit_max_rfer, struct btrfs_qgroup_limit_item, - max_rfer, 64); -BTRFS_SETGET_FUNCS(qgroup_limit_max_excl, struct btrfs_qgroup_limit_item, - max_excl, 64); -BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item, - rsv_rfer, 64); -BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item, - rsv_excl, 64); - static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; @@ -2819,6 +2607,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); +void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); void btrfs_clear_space_info_full(struct btrfs_fs_info *info); int btrfs_check_data_free_space(struct inode *inode, u64 bytes); void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); @@ -2872,8 +2661,6 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); int btrfs_init_space_info(struct btrfs_fs_info *fs_info); -int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); @@ -2924,9 +2711,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow); int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, u64 time_seq); -int btrfs_search_slot_for_read(struct btrfs_root *root, - struct btrfs_key *key, struct btrfs_path *p, - int find_higher, int return_any); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, int cache_only, u64 *last_ret, @@ -3009,22 +2793,11 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) kfree(fs_info->chunk_root); kfree(fs_info->dev_root); kfree(fs_info->csum_root); - kfree(fs_info->quota_root); kfree(fs_info->super_copy); kfree(fs_info->super_for_commit); kfree(fs_info); } -/* tree mod log functions from ctree.c */ -u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, - struct seq_list *elem); -void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, - struct seq_list *elem); -static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) -{ - return atomic_inc_return(&fs_info->tree_mod_seq); -} - /* root-item.c */ int btrfs_find_root_ref(struct btrfs_root *tree_root, struct btrfs_path *path, @@ -3288,23 +3061,6 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *function, unsigned int line, int errno); -#define btrfs_set_fs_incompat(__fs_info, opt) \ - __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt) - -static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, - u64 flag) -{ - struct btrfs_super_block *disk_super; - u64 features; - - disk_super = fs_info->super_copy; - features = btrfs_super_incompat_flags(disk_super); - if (!(features & flag)) { - features |= flag; - btrfs_set_super_incompat_flags(disk_super, features); - } -} - #define btrfs_abort_transaction(trans, root, errno) \ do { \ __btrfs_abort_transaction(trans, root, __func__, \ @@ -3400,49 +3156,17 @@ void btrfs_reada_detach(void *handle); int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, u64 start, int err); -/* qgroup.c */ -struct qgroup_update { +/* delayed seq elem */ +struct seq_list { struct list_head list; - struct btrfs_delayed_ref_node *node; - struct btrfs_delayed_extent_op *extent_op; + u64 seq; + u32 flags; }; -int btrfs_quota_enable(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); -int btrfs_quota_disable(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info); -int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 src, u64 dst); -int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 src, u64 dst); -int btrfs_create_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid, - char *name); -int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid); -int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid, - struct btrfs_qgroup_limit *limit); -int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); -void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); -struct btrfs_delayed_extent_op; -int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_node *node, - struct btrfs_delayed_extent_op *extent_op); -int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *node, - struct btrfs_delayed_extent_op *extent_op); -int btrfs_run_qgroups(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); -int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, - struct btrfs_qgroup_inherit *inherit); -int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); -void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); - -void assert_qgroups_uptodate(struct btrfs_trans_handle *trans); +void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, + struct seq_list *elem); +void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, + struct seq_list *elem); static inline int is_fstree(u64 rootid) { diff --git a/trunk/fs/btrfs/delayed-inode.c b/trunk/fs/btrfs/delayed-inode.c index 335605c8ceab..2399f4086915 100644 --- a/trunk/fs/btrfs/delayed-inode.c +++ b/trunk/fs/btrfs/delayed-inode.c @@ -62,7 +62,6 @@ static inline void btrfs_init_delayed_node( INIT_LIST_HEAD(&delayed_node->n_list); INIT_LIST_HEAD(&delayed_node->p_list); delayed_node->bytes_reserved = 0; - memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item)); } static inline int btrfs_is_continuous_delayed_item( @@ -1114,8 +1113,8 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, * Returns < 0 on error and returns with an aborted transaction with any * outstanding delayed items cleaned up. */ -static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int nr) +int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { struct btrfs_root *curr_root = root; struct btrfs_delayed_root *delayed_root; @@ -1123,7 +1122,6 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_block_rsv *block_rsv; int ret = 0; - bool count = (nr > 0); if (trans->aborted) return -EIO; @@ -1139,7 +1137,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, delayed_root = btrfs_get_delayed_root(root); curr_node = btrfs_first_delayed_node(delayed_root); - while (curr_node && (!count || (count && nr--))) { + while (curr_node) { curr_root = curr_node->root; ret = btrfs_insert_delayed_items(trans, path, curr_root, curr_node); @@ -1151,7 +1149,6 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, path, curr_node); if (ret) { btrfs_release_delayed_node(curr_node); - curr_node = NULL; btrfs_abort_transaction(trans, root, ret); break; } @@ -1161,26 +1158,12 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, btrfs_release_delayed_node(prev_node); } - if (curr_node) - btrfs_release_delayed_node(curr_node); btrfs_free_path(path); trans->block_rsv = block_rsv; return ret; } -int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - return __btrfs_run_delayed_items(trans, root, -1); -} - -int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int nr) -{ - return __btrfs_run_delayed_items(trans, root, nr); -} - static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_delayed_node *node) { diff --git a/trunk/fs/btrfs/delayed-inode.h b/trunk/fs/btrfs/delayed-inode.h index 4f808e1baeed..f5aa4023d3e1 100644 --- a/trunk/fs/btrfs/delayed-inode.h +++ b/trunk/fs/btrfs/delayed-inode.h @@ -107,8 +107,6 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode); int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int nr); void btrfs_balance_delayed_items(struct btrfs_root *root); diff --git a/trunk/fs/btrfs/delayed-ref.c b/trunk/fs/btrfs/delayed-ref.c index da7419ed01bb..13ae7b04790e 100644 --- a/trunk/fs/btrfs/delayed-ref.c +++ b/trunk/fs/btrfs/delayed-ref.c @@ -233,26 +233,22 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, return 0; } -int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, +int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, u64 seq) { struct seq_list *elem; - int ret = 0; - - spin_lock(&fs_info->tree_mod_seq_lock); - if (!list_empty(&fs_info->tree_mod_seq_list)) { - elem = list_first_entry(&fs_info->tree_mod_seq_list, - struct seq_list, list); - if (seq >= elem->seq) { - pr_debug("holding back delayed_ref %llu, lowest is " - "%llu (%p)\n", seq, elem->seq, delayed_refs); - ret = 1; - } - } - spin_unlock(&fs_info->tree_mod_seq_lock); - return ret; + assert_spin_locked(&delayed_refs->lock); + if (list_empty(&delayed_refs->seq_head)) + return 0; + + elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list); + if (seq >= elem->seq) { + pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n", + seq, elem->seq, delayed_refs); + return 1; + } + return 0; } int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, @@ -529,8 +525,8 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->is_head = 0; ref->in_tree = 1; - if (need_ref_seq(for_cow, ref_root)) - seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); + if (is_fstree(ref_root)) + seq = inc_delayed_seq(delayed_refs); ref->seq = seq; full_ref = btrfs_delayed_node_to_tree_ref(ref); @@ -588,8 +584,8 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, ref->is_head = 0; ref->in_tree = 1; - if (need_ref_seq(for_cow, ref_root)) - seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); + if (is_fstree(ref_root)) + seq = inc_delayed_seq(delayed_refs); ref->seq = seq; full_ref = btrfs_delayed_node_to_data_ref(ref); @@ -662,12 +658,10 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, level, action, for_cow); - if (!need_ref_seq(for_cow, ref_root) && - waitqueue_active(&fs_info->tree_mod_seq_wait)) - wake_up(&fs_info->tree_mod_seq_wait); + if (!is_fstree(ref_root) && + waitqueue_active(&delayed_refs->seq_wait)) + wake_up(&delayed_refs->seq_wait); spin_unlock(&delayed_refs->lock); - if (need_ref_seq(for_cow, ref_root)) - btrfs_qgroup_record_ref(trans, &ref->node, extent_op); return 0; } @@ -713,12 +707,10 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, action, for_cow); - if (!need_ref_seq(for_cow, ref_root) && - waitqueue_active(&fs_info->tree_mod_seq_wait)) - wake_up(&fs_info->tree_mod_seq_wait); + if (!is_fstree(ref_root) && + waitqueue_active(&delayed_refs->seq_wait)) + wake_up(&delayed_refs->seq_wait); spin_unlock(&delayed_refs->lock); - if (need_ref_seq(for_cow, ref_root)) - btrfs_qgroup_record_ref(trans, &ref->node, extent_op); return 0; } @@ -744,8 +736,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, num_bytes, BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data); - if (waitqueue_active(&fs_info->tree_mod_seq_wait)) - wake_up(&fs_info->tree_mod_seq_wait); + if (waitqueue_active(&delayed_refs->seq_wait)) + wake_up(&delayed_refs->seq_wait); spin_unlock(&delayed_refs->lock); return 0; } diff --git a/trunk/fs/btrfs/delayed-ref.h b/trunk/fs/btrfs/delayed-ref.h index 0d7c90c366b6..413927fb9957 100644 --- a/trunk/fs/btrfs/delayed-ref.h +++ b/trunk/fs/btrfs/delayed-ref.h @@ -139,6 +139,26 @@ struct btrfs_delayed_ref_root { int flushing; u64 run_delayed_start; + + /* + * seq number of delayed refs. We need to know if a backref was being + * added before the currently processed ref or afterwards. + */ + u64 seq; + + /* + * seq_list holds a list of all seq numbers that are currently being + * added to the list. While walking backrefs (btrfs_find_all_roots, + * qgroups), which might take some time, no newer ref must be processed, + * as it might influence the outcome of the walk. + */ + struct list_head seq_head; + + /* + * when the only refs we have in the list must not be processed, we want + * to wait for more refs to show up or for the end of backref walking. + */ + wait_queue_head_t seq_wait; }; static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) @@ -175,29 +195,35 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, struct list_head *cluster, u64 search_start); -int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, - u64 seq); - -/* - * delayed refs with a ref_seq > 0 must be held back during backref walking. - * this only applies to items in one of the fs-trees. for_cow items never need - * to be held back, so they won't get a ref_seq number. - */ -static inline int need_ref_seq(int for_cow, u64 rootid) +static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) { - if (for_cow) - return 0; - - if (rootid == BTRFS_FS_TREE_OBJECTID) - return 1; + assert_spin_locked(&delayed_refs->lock); + ++delayed_refs->seq; + return delayed_refs->seq; +} - if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) - return 1; +static inline void +btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, + struct seq_list *elem) +{ + assert_spin_locked(&delayed_refs->lock); + elem->seq = delayed_refs->seq; + list_add_tail(&elem->list, &delayed_refs->seq_head); +} - return 0; +static inline void +btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, + struct seq_list *elem) +{ + spin_lock(&delayed_refs->lock); + list_del(&elem->list); + wake_up(&delayed_refs->seq_wait); + spin_unlock(&delayed_refs->lock); } +int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, + u64 seq); + /* * a node might live in a head or a regular ref, this lets you * test for the proper type to use. diff --git a/trunk/fs/btrfs/disk-io.c b/trunk/fs/btrfs/disk-io.c index 05f4fb6e0607..2936ca49b3b4 100644 --- a/trunk/fs/btrfs/disk-io.c +++ b/trunk/fs/btrfs/disk-io.c @@ -407,7 +407,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, break; } - if (failed && !ret && failed_mirror) + if (failed && !ret) repair_eb_io_failure(root, eb, failed_mirror); return ret; @@ -1225,82 +1225,6 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) return root; } -struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - u64 objectid) -{ - struct extent_buffer *leaf; - struct btrfs_root *tree_root = fs_info->tree_root; - struct btrfs_root *root; - struct btrfs_key key; - int ret = 0; - u64 bytenr; - - root = btrfs_alloc_root(fs_info); - if (!root) - return ERR_PTR(-ENOMEM); - - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, objectid); - root->root_key.objectid = objectid; - root->root_key.type = BTRFS_ROOT_ITEM_KEY; - root->root_key.offset = 0; - - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, - 0, objectid, NULL, 0, 0, 0); - if (IS_ERR(leaf)) { - ret = PTR_ERR(leaf); - goto fail; - } - - bytenr = leaf->start; - memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_bytenr(leaf, leaf->start); - btrfs_set_header_generation(leaf, trans->transid); - btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); - btrfs_set_header_owner(leaf, objectid); - root->node = leaf; - - write_extent_buffer(leaf, fs_info->fsid, - (unsigned long)btrfs_header_fsid(leaf), - BTRFS_FSID_SIZE); - write_extent_buffer(leaf, fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(leaf), - BTRFS_UUID_SIZE); - btrfs_mark_buffer_dirty(leaf); - - root->commit_root = btrfs_root_node(root); - root->track_dirty = 1; - - - root->root_item.flags = 0; - root->root_item.byte_limit = 0; - btrfs_set_root_bytenr(&root->root_item, leaf->start); - btrfs_set_root_generation(&root->root_item, trans->transid); - btrfs_set_root_level(&root->root_item, 0); - btrfs_set_root_refs(&root->root_item, 1); - btrfs_set_root_used(&root->root_item, leaf->len); - btrfs_set_root_last_snapshot(&root->root_item, 0); - btrfs_set_root_dirid(&root->root_item, 0); - root->root_item.drop_level = 0; - - key.objectid = objectid; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = 0; - ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item); - if (ret) - goto fail; - - btrfs_tree_unlock(leaf); - -fail: - if (ret) - return ERR_PTR(ret); - - return root; -} - static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { @@ -1472,9 +1396,6 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, return fs_info->dev_root; if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) return fs_info->csum_root; - if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) - return fs_info->quota_root ? fs_info->quota_root : - ERR_PTR(-ENOENT); again: spin_lock(&fs_info->fs_roots_radix_lock); root = radix_tree_lookup(&fs_info->fs_roots_radix, @@ -1902,10 +1823,6 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) free_extent_buffer(info->extent_root->commit_root); free_extent_buffer(info->csum_root->node); free_extent_buffer(info->csum_root->commit_root); - if (info->quota_root) { - free_extent_buffer(info->quota_root->node); - free_extent_buffer(info->quota_root->commit_root); - } info->tree_root->node = NULL; info->tree_root->commit_root = NULL; @@ -1915,10 +1832,6 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) info->extent_root->commit_root = NULL; info->csum_root->node = NULL; info->csum_root->commit_root = NULL; - if (info->quota_root) { - info->quota_root->node = NULL; - info->quota_root->commit_root = NULL; - } if (chunk_root) { free_extent_buffer(info->chunk_root->node); @@ -1949,7 +1862,6 @@ int open_ctree(struct super_block *sb, struct btrfs_root *csum_root; struct btrfs_root *chunk_root; struct btrfs_root *dev_root; - struct btrfs_root *quota_root; struct btrfs_root *log_tree_root; int ret; int err = -EINVAL; @@ -1961,10 +1873,9 @@ int open_ctree(struct super_block *sb, csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); - quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info); if (!tree_root || !extent_root || !csum_root || - !chunk_root || !dev_root || !quota_root) { + !chunk_root || !dev_root) { err = -ENOMEM; goto fail; } @@ -2033,8 +1944,6 @@ int open_ctree(struct super_block *sb, fs_info->free_chunk_space = 0; fs_info->tree_mod_log = RB_ROOT; - init_waitqueue_head(&fs_info->tree_mod_seq_wait); - /* readahead state */ INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); spin_lock_init(&fs_info->reada_lock); @@ -2123,13 +2032,6 @@ int open_ctree(struct super_block *sb, init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); - spin_lock_init(&fs_info->qgroup_lock); - fs_info->qgroup_tree = RB_ROOT; - INIT_LIST_HEAD(&fs_info->dirty_qgroups); - fs_info->qgroup_seq = 1; - fs_info->quota_enabled = 0; - fs_info->pending_quota_state = 0; - btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); btrfs_init_free_cluster(&fs_info->data_alloc_cluster); @@ -2342,7 +2244,7 @@ int open_ctree(struct super_block *sb, ret |= btrfs_start_workers(&fs_info->caching_workers); ret |= btrfs_start_workers(&fs_info->readahead_workers); if (ret) { - err = -ENOMEM; + ret = -ENOMEM; goto fail_sb_buffer; } @@ -2454,17 +2356,6 @@ int open_ctree(struct super_block *sb, goto recovery_tree_root; csum_root->track_dirty = 1; - ret = find_and_setup_root(tree_root, fs_info, - BTRFS_QUOTA_TREE_OBJECTID, quota_root); - if (ret) { - kfree(quota_root); - quota_root = fs_info->quota_root = NULL; - } else { - quota_root->track_dirty = 1; - fs_info->quota_enabled = 1; - fs_info->pending_quota_state = 1; - } - fs_info->generation = generation; fs_info->last_trans_committed = generation; @@ -2524,9 +2415,6 @@ int open_ctree(struct super_block *sb, " integrity check module %s\n", sb->s_id); } #endif - ret = btrfs_read_qgroup_config(fs_info); - if (ret) - goto fail_trans_kthread; /* do not make disk changes in broken FS */ if (btrfs_super_log_root(disk_super) != 0 && @@ -2537,7 +2425,7 @@ int open_ctree(struct super_block *sb, printk(KERN_WARNING "Btrfs log replay required " "on RO media\n"); err = -EIO; - goto fail_qgroup; + goto fail_trans_kthread; } blocksize = btrfs_level_size(tree_root, @@ -2546,7 +2434,7 @@ int open_ctree(struct super_block *sb, log_tree_root = btrfs_alloc_root(fs_info); if (!log_tree_root) { err = -ENOMEM; - goto fail_qgroup; + goto fail_trans_kthread; } __setup_root(nodesize, leafsize, sectorsize, stripesize, @@ -2578,15 +2466,15 @@ int open_ctree(struct super_block *sb, if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_cleanup_fs_roots(fs_info); - if (ret) - goto fail_trans_kthread; + if (ret) { + } ret = btrfs_recover_relocation(tree_root); if (ret < 0) { printk(KERN_WARNING "btrfs: failed to recover relocation\n"); err = -EINVAL; - goto fail_qgroup; + goto fail_trans_kthread; } } @@ -2596,10 +2484,10 @@ int open_ctree(struct super_block *sb, fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); if (!fs_info->fs_root) - goto fail_qgroup; + goto fail_trans_kthread; if (IS_ERR(fs_info->fs_root)) { err = PTR_ERR(fs_info->fs_root); - goto fail_qgroup; + goto fail_trans_kthread; } if (sb->s_flags & MS_RDONLY) @@ -2623,8 +2511,6 @@ int open_ctree(struct super_block *sb, return 0; -fail_qgroup: - btrfs_free_qgroup_config(fs_info); fail_trans_kthread: kthread_stop(fs_info->transaction_kthread); fail_cleaner: @@ -3223,8 +3109,6 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 2; smp_mb(); - btrfs_free_qgroup_config(root->fs_info); - if (fs_info->delalloc_bytes) { printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", (unsigned long long)fs_info->delalloc_bytes); @@ -3244,10 +3128,6 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(fs_info->dev_root->commit_root); free_extent_buffer(fs_info->csum_root->node); free_extent_buffer(fs_info->csum_root->commit_root); - if (fs_info->quota_root) { - free_extent_buffer(fs_info->quota_root->node); - free_extent_buffer(fs_info->quota_root->commit_root); - } btrfs_free_block_groups(fs_info); @@ -3378,7 +3258,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); } -int btree_lock_page_hook(struct page *page, void *data, +static int btree_lock_page_hook(struct page *page, void *data, void (*flush_fn)(void *)) { struct inode *inode = page->mapping->host; diff --git a/trunk/fs/btrfs/disk-io.h b/trunk/fs/btrfs/disk-io.h index 95e147eea239..05b3fab39f7e 100644 --- a/trunk/fs/btrfs/disk-io.h +++ b/trunk/fs/btrfs/disk-io.h @@ -89,12 +89,6 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, int btrfs_cleanup_transaction(struct btrfs_root *root); void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, struct btrfs_root *root); -void btrfs_abort_devices(struct btrfs_root *root); -struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - u64 objectid); -int btree_lock_page_hook(struct page *page, void *data, - void (*flush_fn)(void *)); #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_init_lockdep(void); diff --git a/trunk/fs/btrfs/extent-tree.c b/trunk/fs/btrfs/extent-tree.c index 4e1b153b7c47..6e1d36702ff7 100644 --- a/trunk/fs/btrfs/extent-tree.c +++ b/trunk/fs/btrfs/extent-tree.c @@ -34,8 +34,6 @@ #include "locking.h" #include "free-space-cache.h" -#undef SCRAMBLE_DELAYED_REFS - /* * control flags for do_chunk_alloc's force field * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk @@ -2219,7 +2217,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref; struct btrfs_delayed_ref_head *locked_ref = NULL; struct btrfs_delayed_extent_op *extent_op; - struct btrfs_fs_info *fs_info = root->fs_info; int ret; int count = 0; int must_insert_reserved = 0; @@ -2258,7 +2255,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ref = select_delayed_ref(locked_ref); if (ref && ref->seq && - btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { + btrfs_check_delayed_seq(delayed_refs, ref->seq)) { /* * there are still refs with lower seq numbers in the * process of being added. Don't run this ref yet. @@ -2340,7 +2337,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, } next: - do_chunk_alloc(trans, fs_info->extent_root, + do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), CHUNK_ALLOC_NO_FORCE); @@ -2350,99 +2347,21 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, return count; } -static void wait_for_more_refs(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, +static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, unsigned long num_refs, struct list_head *first_seq) { spin_unlock(&delayed_refs->lock); pr_debug("waiting for more refs (num %ld, first %p)\n", num_refs, first_seq); - wait_event(fs_info->tree_mod_seq_wait, + wait_event(delayed_refs->seq_wait, num_refs != delayed_refs->num_entries || - fs_info->tree_mod_seq_list.next != first_seq); + delayed_refs->seq_head.next != first_seq); pr_debug("done waiting for more refs (num %ld, first %p)\n", - delayed_refs->num_entries, fs_info->tree_mod_seq_list.next); + delayed_refs->num_entries, delayed_refs->seq_head.next); spin_lock(&delayed_refs->lock); } -#ifdef SCRAMBLE_DELAYED_REFS -/* - * Normally delayed refs get processed in ascending bytenr order. This - * correlates in most cases to the order added. To expose dependencies on this - * order, we start to process the tree in the middle instead of the beginning - */ -static u64 find_middle(struct rb_root *root) -{ - struct rb_node *n = root->rb_node; - struct btrfs_delayed_ref_node *entry; - int alt = 1; - u64 middle; - u64 first = 0, last = 0; - - n = rb_first(root); - if (n) { - entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); - first = entry->bytenr; - } - n = rb_last(root); - if (n) { - entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); - last = entry->bytenr; - } - n = root->rb_node; - - while (n) { - entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); - WARN_ON(!entry->in_tree); - - middle = entry->bytenr; - - if (alt) - n = n->rb_left; - else - n = n->rb_right; - - alt = 1 - alt; - } - return middle; -} -#endif - -int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - struct qgroup_update *qgroup_update; - int ret = 0; - - if (list_empty(&trans->qgroup_ref_list) != - !trans->delayed_ref_elem.seq) { - /* list without seq or seq without list */ - printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n", - list_empty(&trans->qgroup_ref_list) ? "" : " not", - trans->delayed_ref_elem.seq); - BUG(); - } - - if (!trans->delayed_ref_elem.seq) - return 0; - - while (!list_empty(&trans->qgroup_ref_list)) { - qgroup_update = list_first_entry(&trans->qgroup_ref_list, - struct qgroup_update, list); - list_del(&qgroup_update->list); - if (!ret) - ret = btrfs_qgroup_account_ref( - trans, fs_info, qgroup_update->node, - qgroup_update->extent_op); - kfree(qgroup_update); - } - - btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem); - - return ret; -} - /* * this starts processing the delayed reference count updates and * extent insertions we have queued up so far. count can be @@ -2479,18 +2398,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), CHUNK_ALLOC_NO_FORCE); - btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); - delayed_refs = &trans->transaction->delayed_refs; INIT_LIST_HEAD(&cluster); again: consider_waiting = 0; spin_lock(&delayed_refs->lock); - -#ifdef SCRAMBLE_DELAYED_REFS - delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); -#endif - if (count == 0) { count = delayed_refs->num_entries * 2; run_most = 1; @@ -2525,7 +2437,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, num_refs = delayed_refs->num_entries; first_seq = root->fs_info->tree_mod_seq_list.next; } else { - wait_for_more_refs(root->fs_info, delayed_refs, + wait_for_more_refs(delayed_refs, num_refs, first_seq); /* * after waiting, things have changed. we @@ -2590,7 +2502,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, } out: spin_unlock(&delayed_refs->lock); - assert_qgroups_uptodate(trans); return 0; } @@ -2670,10 +2581,8 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, node = rb_prev(node); if (node) { - int seq = ref->seq; - ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - if (ref->bytenr == bytenr && ref->seq == seq) + if (ref->bytenr == bytenr) goto out_unlock; } @@ -2994,13 +2903,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, } spin_lock(&block_group->lock); - if (block_group->cached != BTRFS_CACHE_FINISHED || - !btrfs_test_opt(root, SPACE_CACHE)) { - /* - * don't bother trying to write stuff out _if_ - * a) we're not cached, - * b) we're with nospace_cache mount option. - */ + if (block_group->cached != BTRFS_CACHE_FINISHED) { + /* We're not cached, don't bother trying to write stuff out */ dcs = BTRFS_DC_WRITTEN; spin_unlock(&block_group->lock); goto out_put; @@ -3230,8 +3134,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, init_waitqueue_head(&found->wait); *space_info = found; list_add_rcu(&found->list, &info->space_info); - if (flags & BTRFS_BLOCK_GROUP_DATA) - info->data_sinfo = found; return 0; } @@ -3361,6 +3263,12 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) return get_alloc_profile(root, flags); } +void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) +{ + BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, + BTRFS_BLOCK_GROUP_DATA); +} + /* * This will check the space that the inode allocates from to make sure we have * enough space for bytes. @@ -3369,7 +3277,6 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) { struct btrfs_space_info *data_sinfo; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_fs_info *fs_info = root->fs_info; u64 used; int ret = 0, committed = 0, alloc_chunk = 1; @@ -3382,7 +3289,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) committed = 1; } - data_sinfo = fs_info->data_sinfo; + data_sinfo = BTRFS_I(inode)->space_info; if (!data_sinfo) goto alloc; @@ -3423,9 +3330,10 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) goto commit_trans; } - if (!data_sinfo) - data_sinfo = fs_info->data_sinfo; - + if (!data_sinfo) { + btrfs_set_inode_space_info(root, inode); + data_sinfo = BTRFS_I(inode)->space_info; + } goto again; } @@ -3472,7 +3380,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) /* make sure bytes are sectorsize aligned */ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); - data_sinfo = root->fs_info->data_sinfo; + data_sinfo = BTRFS_I(inode)->space_info; spin_lock(&data_sinfo->lock); data_sinfo->bytes_may_use -= bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", @@ -3678,58 +3586,89 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, /* * shrink metadata reservation for delalloc */ -static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, - bool wait_ordered) +static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, + bool wait_ordered) { struct btrfs_block_rsv *block_rsv; struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; - u64 delalloc_bytes; + u64 reserved; u64 max_reclaim; + u64 reclaimed = 0; long time_left; unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; int loops = 0; + unsigned long progress; trans = (struct btrfs_trans_handle *)current->journal_info; block_rsv = &root->fs_info->delalloc_block_rsv; space_info = block_rsv->space_info; smp_mb(); - delalloc_bytes = root->fs_info->delalloc_bytes; - if (delalloc_bytes == 0) { + reserved = space_info->bytes_may_use; + progress = space_info->reservation_progress; + + if (reserved == 0) + return 0; + + smp_mb(); + if (root->fs_info->delalloc_bytes == 0) { if (trans) - return; + return 0; btrfs_wait_ordered_extents(root, 0, 0); - return; + return 0; } - while (delalloc_bytes && loops < 3) { - max_reclaim = min(delalloc_bytes, to_reclaim); - nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; + max_reclaim = min(reserved, to_reclaim); + nr_pages = max_t(unsigned long, nr_pages, + max_reclaim >> PAGE_CACHE_SHIFT); + while (loops < 1024) { + /* have the flusher threads jump in and do some IO */ + smp_mb(); + nr_pages = min_t(unsigned long, nr_pages, + root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, - WB_REASON_FS_FREE_SPACE); + WB_REASON_FS_FREE_SPACE); spin_lock(&space_info->lock); - if (space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly + - space_info->bytes_may_use + orig <= - space_info->total_bytes) { - spin_unlock(&space_info->lock); - break; - } + if (reserved > space_info->bytes_may_use) + reclaimed += reserved - space_info->bytes_may_use; + reserved = space_info->bytes_may_use; spin_unlock(&space_info->lock); loops++; + + if (reserved == 0 || reclaimed >= max_reclaim) + break; + + if (trans && trans->transaction->blocked) + return -EAGAIN; + if (wait_ordered && !trans) { btrfs_wait_ordered_extents(root, 0, 0); } else { - time_left = schedule_timeout_killable(1); + time_left = schedule_timeout_interruptible(1); + + /* We were interrupted, exit */ if (time_left) break; } - smp_mb(); - delalloc_bytes = root->fs_info->delalloc_bytes; + + /* we've kicked the IO a few times, if anything has been freed, + * exit. There is no sense in looping here for a long time + * when we really need to commit the transaction, or there are + * just too many writers without enough free space + */ + + if (loops > 3) { + smp_mb(); + if (progress != space_info->reservation_progress) + break; + } + } + + return reclaimed >= to_reclaim; } /** @@ -3789,58 +3728,6 @@ static int may_commit_transaction(struct btrfs_root *root, return btrfs_commit_transaction(trans, root); } -enum flush_state { - FLUSH_DELALLOC = 1, - FLUSH_DELALLOC_WAIT = 2, - FLUSH_DELAYED_ITEMS_NR = 3, - FLUSH_DELAYED_ITEMS = 4, - COMMIT_TRANS = 5, -}; - -static int flush_space(struct btrfs_root *root, - struct btrfs_space_info *space_info, u64 num_bytes, - u64 orig_bytes, int state) -{ - struct btrfs_trans_handle *trans; - int nr; - int ret = 0; - - switch (state) { - case FLUSH_DELALLOC: - case FLUSH_DELALLOC_WAIT: - shrink_delalloc(root, num_bytes, orig_bytes, - state == FLUSH_DELALLOC_WAIT); - break; - case FLUSH_DELAYED_ITEMS_NR: - case FLUSH_DELAYED_ITEMS: - if (state == FLUSH_DELAYED_ITEMS_NR) { - u64 bytes = btrfs_calc_trans_metadata_size(root, 1); - - nr = (int)div64_u64(num_bytes, bytes); - if (!nr) - nr = 1; - nr *= 2; - } else { - nr = -1; - } - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - break; - } - ret = btrfs_run_delayed_items_nr(trans, root, nr); - btrfs_end_transaction(trans, root); - break; - case COMMIT_TRANS: - ret = may_commit_transaction(root, space_info, orig_bytes, 0); - break; - default: - ret = -ENOSPC; - break; - } - - return ret; -} /** * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space * @root - the root we're allocating for @@ -3862,10 +3749,11 @@ static int reserve_metadata_bytes(struct btrfs_root *root, struct btrfs_space_info *space_info = block_rsv->space_info; u64 used; u64 num_bytes = orig_bytes; - int flush_state = FLUSH_DELALLOC; + int retries = 0; int ret = 0; - bool flushing = false; bool committed = false; + bool flushing = false; + bool wait_ordered = false; again: ret = 0; @@ -3924,8 +3812,9 @@ static int reserve_metadata_bytes(struct btrfs_root *root, * amount plus the amount of bytes that we need for this * reservation. */ + wait_ordered = true; num_bytes = used - space_info->total_bytes + - (orig_bytes * 2); + (orig_bytes * (retries + 1)); } if (ret) { @@ -3978,6 +3867,8 @@ static int reserve_metadata_bytes(struct btrfs_root *root, trace_btrfs_space_reservation(root->fs_info, "space_info", space_info->flags, orig_bytes, 1); ret = 0; + } else { + wait_ordered = true; } } @@ -3996,13 +3887,36 @@ static int reserve_metadata_bytes(struct btrfs_root *root, if (!ret || !flush) goto out; - ret = flush_space(root, space_info, num_bytes, orig_bytes, - flush_state); - flush_state++; - if (!ret) + /* + * We do synchronous shrinking since we don't actually unreserve + * metadata until after the IO is completed. + */ + ret = shrink_delalloc(root, num_bytes, wait_ordered); + if (ret < 0) + goto out; + + ret = 0; + + /* + * So if we were overcommitted it's possible that somebody else flushed + * out enough space and we simply didn't have enough space to reclaim, + * so go back around and try again. + */ + if (retries < 2) { + wait_ordered = true; + retries++; goto again; - else if (flush_state <= COMMIT_TRANS) + } + + ret = -ENOSPC; + if (committed) + goto out; + + ret = may_commit_transaction(root, space_info, orig_bytes, 0); + if (!ret) { + committed = true; goto again; + } out: if (flushing) { @@ -4020,10 +3934,7 @@ static struct btrfs_block_rsv *get_block_rsv( { struct btrfs_block_rsv *block_rsv = NULL; - if (root->ref_cows) - block_rsv = trans->block_rsv; - - if (root == root->fs_info->csum_root && trans->adding_csums) + if (root->ref_cows || root == root->fs_info->csum_root) block_rsv = trans->block_rsv; if (!block_rsv) @@ -4375,9 +4286,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - if (!trans->block_rsv) - return; - if (!trans->bytes_reserved) return; @@ -4536,7 +4444,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) int ret; /* Need to be holding the i_mutex here if we aren't free space cache */ - if (btrfs_is_free_space_inode(inode)) + if (btrfs_is_free_space_inode(root, inode)) flush = 0; if (flush && btrfs_transaction_in_commit(root->fs_info)) @@ -4568,13 +4476,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) csum_bytes = BTRFS_I(inode)->csum_bytes; spin_unlock(&BTRFS_I(inode)->lock); - if (root->fs_info->quota_enabled) { - ret = btrfs_qgroup_reserve(root, num_bytes + - nr_extents * root->leafsize); - if (ret) - return ret; - } - ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); if (ret) { u64 to_free = 0; @@ -4653,11 +4554,6 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), to_free, 0); - if (root->fs_info->quota_enabled) { - btrfs_qgroup_free(root, num_bytes + - dropped * root->leafsize); - } - btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, to_free); } @@ -5294,9 +5190,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, rb_erase(&head->node.rb_node, &delayed_refs->root); delayed_refs->num_entries--; - smp_mb(); - if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) - wake_up(&root->fs_info->tree_mod_seq_wait); + if (waitqueue_active(&delayed_refs->seq_wait)) + wake_up(&delayed_refs->seq_wait); /* * we don't take a ref on the node because we're removing it from the @@ -5853,11 +5748,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, data, CHUNK_ALLOC_LIMITED); - /* - * Do not bail out on ENOSPC since we - * can do more things. - */ - if (ret < 0 && ret != -ENOSPC) { + if (ret < 0) { btrfs_abort_transaction(trans, root, ret); goto out; @@ -5925,13 +5816,13 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, again: list_for_each_entry(cache, &info->block_groups[index], list) { spin_lock(&cache->lock); - printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", + printk(KERN_INFO "block group %llu has %llu bytes, %llu used " + "%llu pinned %llu reserved\n", (unsigned long long)cache->key.objectid, (unsigned long long)cache->key.offset, (unsigned long long)btrfs_block_group_used(&cache->item), (unsigned long long)cache->pinned, - (unsigned long long)cache->reserved, - cache->ro ? "[readonly]" : ""); + (unsigned long long)cache->reserved); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } @@ -7719,21 +7610,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); - if (need_clear) { - /* - * When we mount with old space cache, we need to - * set BTRFS_DC_CLEAR and set dirty flag. - * - * a) Setting 'BTRFS_DC_CLEAR' makes sure that we - * truncate the old free space cache inode and - * setup a new one. - * b) Setting 'dirty flag' makes sure that we flush - * the new space cache info onto disk. - */ + if (need_clear) cache->disk_cache_state = BTRFS_DC_CLEAR; - if (btrfs_test_opt(root, SPACE_CACHE)) - cache->dirty = 1; - } read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), diff --git a/trunk/fs/btrfs/extent_io.c b/trunk/fs/btrfs/extent_io.c index 3e7c9ed6505b..01c21b6c6d43 100644 --- a/trunk/fs/btrfs/extent_io.c +++ b/trunk/fs/btrfs/extent_io.c @@ -1918,7 +1918,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, return -EIO; } - printk_ratelimited_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " + printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " "(dev %s sector %llu)\n", page->mapping->host->i_ino, start, rcu_str_deref(dev->name), sector); @@ -3077,15 +3077,8 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, } } - /* - * We need to do this to prevent races in people who check if the eb is - * under IO since we can end up having no IO bits set for a short period - * of time. - */ - spin_lock(&eb->refs_lock); if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); - spin_unlock(&eb->refs_lock); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); spin_lock(&fs_info->delalloc_lock); if (fs_info->dirty_metadata_bytes >= eb->len) @@ -3094,8 +3087,6 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, WARN_ON(1); spin_unlock(&fs_info->delalloc_lock); ret = 1; - } else { - spin_unlock(&eb->refs_lock); } btrfs_tree_unlock(eb); @@ -3566,38 +3557,19 @@ int extent_readpages(struct extent_io_tree *tree, struct bio *bio = NULL; unsigned page_idx; unsigned long bio_flags = 0; - struct page *pagepool[16]; - struct page *page; - int i = 0; - int nr = 0; for (page_idx = 0; page_idx < nr_pages; page_idx++) { - page = list_entry(pages->prev, struct page, lru); + struct page *page = list_entry(pages->prev, struct page, lru); prefetchw(&page->flags); list_del(&page->lru); - if (add_to_page_cache_lru(page, mapping, + if (!add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { - page_cache_release(page); - continue; - } - - pagepool[nr++] = page; - if (nr < ARRAY_SIZE(pagepool)) - continue; - for (i = 0; i < nr; i++) { - __extent_read_full_page(tree, pagepool[i], get_extent, - &bio, 0, &bio_flags); - page_cache_release(pagepool[i]); + __extent_read_full_page(tree, page, get_extent, + &bio, 0, &bio_flags); } - nr = 0; - } - for (i = 0; i < nr; i++) { - __extent_read_full_page(tree, pagepool[i], get_extent, - &bio, 0, &bio_flags); - page_cache_release(pagepool[i]); + page_cache_release(page); } - BUG_ON(!list_empty(pages)); if (bio) return submit_one_bio(READ, bio, 0, bio_flags); @@ -4151,10 +4123,11 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) * So bump the ref count first, then set the bit. If someone * beat us to it, drop the ref we added. */ - spin_lock(&eb->refs_lock); - if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) + if (!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) { atomic_inc(&eb->refs); - spin_unlock(&eb->refs_lock); + if (test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) + atomic_dec(&eb->refs); + } } static void mark_extent_buffer_accessed(struct extent_buffer *eb) @@ -4266,7 +4239,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, goto free_eb; } /* add one reference for the tree */ + spin_lock(&eb->refs_lock); check_buffer_tree_ref(eb); + spin_unlock(&eb->refs_lock); spin_unlock(&tree->buffer_lock); radix_tree_preload_end(); @@ -4325,7 +4300,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) } /* Expects to have eb->eb_lock already held */ -static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask) +static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) { WARN_ON(atomic_read(&eb->refs) == 0); if (atomic_dec_and_test(&eb->refs)) { @@ -4346,11 +4321,9 @@ static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask) btrfs_release_extent_buffer_page(eb, 0); call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); - return 1; + return; } spin_unlock(&eb->refs_lock); - - return 0; } void free_extent_buffer(struct extent_buffer *eb) @@ -4989,6 +4962,7 @@ int try_release_extent_buffer(struct page *page, gfp_t mask) spin_unlock(&eb->refs_lock); return 0; } + release_extent_buffer(eb, mask); - return release_extent_buffer(eb, mask); + return 1; } diff --git a/trunk/fs/btrfs/file-item.c b/trunk/fs/btrfs/file-item.c index b45b9de0c21d..5d158d320233 100644 --- a/trunk/fs/btrfs/file-item.c +++ b/trunk/fs/btrfs/file-item.c @@ -183,7 +183,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, * read from the commit root and sidestep a nasty deadlock * between reading the free space cache and updating the csum tree. */ - if (btrfs_is_free_space_inode(inode)) { + if (btrfs_is_free_space_inode(root, inode)) { path->search_commit_root = 1; path->skip_locking = 1; } @@ -690,7 +690,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, return -ENOMEM; sector_sum = sums->sums; - trans->adding_csums = 1; again: next_offset = (u64)-1; found_next = 0; @@ -854,7 +853,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, goto again; } out: - trans->adding_csums = 0; btrfs_free_path(path); return ret; diff --git a/trunk/fs/btrfs/free-space-cache.c b/trunk/fs/btrfs/free-space-cache.c index 6b10acfc2f5c..6c4e2baa9290 100644 --- a/trunk/fs/btrfs/free-space-cache.c +++ b/trunk/fs/btrfs/free-space-cache.c @@ -1968,7 +1968,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { info = rb_entry(n, struct btrfs_free_space, offset_index); - if (info->bytes >= bytes && !block_group->ro) + if (info->bytes >= bytes) count++; printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", (unsigned long long)info->offset, diff --git a/trunk/fs/btrfs/inode.c b/trunk/fs/btrfs/inode.c index 144f4642b2a9..a7d1921ac76b 100644 --- a/trunk/fs/btrfs/inode.c +++ b/trunk/fs/btrfs/inode.c @@ -825,7 +825,7 @@ static noinline int cow_file_range(struct inode *inode, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; - BUG_ON(btrfs_is_free_space_inode(inode)); + BUG_ON(btrfs_is_free_space_inode(root, inode)); trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { extent_clear_unlock_delalloc(inode, @@ -1010,7 +1010,7 @@ static noinline void async_cow_submit(struct btrfs_work *work) atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); if (atomic_read(&root->fs_info->async_delalloc_pages) < - 5 * 1024 * 1024 && + 5 * 1042 * 1024 && waitqueue_active(&root->fs_info->async_submit_wait)) wake_up(&root->fs_info->async_submit_wait); @@ -1035,7 +1035,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, struct btrfs_root *root = BTRFS_I(inode)->root; unsigned long nr_pages; u64 cur_end; - int limit = 10 * 1024 * 1024; + int limit = 10 * 1024 * 1042; clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); @@ -1153,7 +1153,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, return -ENOMEM; } - nolock = btrfs_is_free_space_inode(inode); + nolock = btrfs_is_free_space_inode(root, inode); if (nolock) trans = btrfs_join_transaction_nolock(root); @@ -1466,7 +1466,7 @@ static void btrfs_set_bit_hook(struct inode *inode, if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - bool do_list = !btrfs_is_free_space_inode(inode); + bool do_list = !btrfs_is_free_space_inode(root, inode); if (*bits & EXTENT_FIRST_DELALLOC) { *bits &= ~EXTENT_FIRST_DELALLOC; @@ -1501,7 +1501,7 @@ static void btrfs_clear_bit_hook(struct inode *inode, if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - bool do_list = !btrfs_is_free_space_inode(inode); + bool do_list = !btrfs_is_free_space_inode(root, inode); if (*bits & EXTENT_FIRST_DELALLOC) { *bits &= ~EXTENT_FIRST_DELALLOC; @@ -1612,7 +1612,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; - if (btrfs_is_free_space_inode(inode)) + if (btrfs_is_free_space_inode(root, inode)) metadata = 2; if (!(rw & REQ_WRITE)) { @@ -1869,7 +1869,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) int ret; bool nolock; - nolock = btrfs_is_free_space_inode(inode); + nolock = btrfs_is_free_space_inode(root, inode); if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { ret = -EIO; @@ -2007,7 +2007,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, ordered_extent->work.func = finish_ordered_fn; ordered_extent->work.flags = 0; - if (btrfs_is_free_space_inode(inode)) + if (btrfs_is_free_space_inode(root, inode)) workers = &root->fs_info->endio_freespace_worker; else workers = &root->fs_info->endio_write_workers; @@ -2732,7 +2732,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, * The data relocation inode should also be directly updated * without delay */ - if (!btrfs_is_free_space_inode(inode) + if (!btrfs_is_free_space_inode(root, inode) && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_delayed_update_inode(trans, root, inode); if (!ret) @@ -2833,7 +2833,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, inode_inc_iversion(inode); inode_inc_iversion(dir); inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; - ret = btrfs_update_inode(trans, root, dir); + btrfs_update_inode(trans, root, dir); out: return ret; } @@ -3743,7 +3743,7 @@ void btrfs_evict_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || - btrfs_is_free_space_inode(inode))) + btrfs_is_free_space_inode(root, inode))) goto no_delete; if (is_bad_inode(inode)) { @@ -4082,6 +4082,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; + btrfs_set_inode_space_info(args->root, inode); return 0; } @@ -4456,7 +4457,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) return 0; - if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode)) + if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) nolock = true; if (wbc->sync_mode == WB_SYNC_ALL) { @@ -4517,11 +4518,6 @@ int btrfs_dirty_inode(struct inode *inode) static int btrfs_update_time(struct inode *inode, struct timespec *now, int flags) { - struct btrfs_root *root = BTRFS_I(inode)->root; - - if (btrfs_root_readonly(root)) - return -EROFS; - if (flags & S_VERSION) inode_inc_iversion(inode); if (flags & S_CTIME) @@ -4666,6 +4662,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BTRFS_I(inode)->root = root; BTRFS_I(inode)->generation = trans->transid; inode->i_generation = BTRFS_I(inode)->generation; + btrfs_set_inode_space_info(root, inode); if (S_ISDIR(mode)) owner = 0; @@ -4693,8 +4690,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); - memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item, - sizeof(*inode_item)); fill_inode_item(trans, path->nodes[0], inode_item, inode); ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, @@ -6944,6 +6939,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) return NULL; ei->root = NULL; + ei->space_info = NULL; ei->generation = 0; ei->last_trans = 0; ei->last_sub_trans = 0; @@ -7050,7 +7046,7 @@ int btrfs_drop_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; if (btrfs_root_refs(&root->root_item) == 0 && - !btrfs_is_free_space_inode(inode)) + !btrfs_is_free_space_inode(root, inode)) return 1; else return generic_drop_inode(inode); diff --git a/trunk/fs/btrfs/ioctl.c b/trunk/fs/btrfs/ioctl.c index 3f3cbe928a1a..7011871c45b8 100644 --- a/trunk/fs/btrfs/ioctl.c +++ b/trunk/fs/btrfs/ioctl.c @@ -336,8 +336,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) static noinline int create_subvol(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, - u64 *async_transid, - struct btrfs_qgroup_inherit **inherit) + u64 *async_transid) { struct btrfs_trans_handle *trans; struct btrfs_key key; @@ -369,11 +368,6 @@ static noinline int create_subvol(struct btrfs_root *root, if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, - inherit ? *inherit : NULL); - if (ret) - goto fail; - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, objectid, NULL, 0, 0, 0); if (IS_ERR(leaf)) { @@ -490,7 +484,7 @@ static noinline int create_subvol(struct btrfs_root *root, static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, u64 *async_transid, - bool readonly, struct btrfs_qgroup_inherit **inherit) + bool readonly) { struct inode *inode; struct btrfs_pending_snapshot *pending_snapshot; @@ -508,10 +502,6 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, pending_snapshot->dentry = dentry; pending_snapshot->root = root; pending_snapshot->readonly = readonly; - if (inherit) { - pending_snapshot->inherit = *inherit; - *inherit = NULL; /* take responsibility to free it */ - } trans = btrfs_start_transaction(root->fs_info->extent_root, 5); if (IS_ERR(trans)) { @@ -645,8 +635,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) static noinline int btrfs_mksubvol(struct path *parent, char *name, int namelen, struct btrfs_root *snap_src, - u64 *async_transid, bool readonly, - struct btrfs_qgroup_inherit **inherit) + u64 *async_transid, bool readonly) { struct inode *dir = parent->dentry->d_inode; struct dentry *dentry; @@ -663,26 +652,32 @@ static noinline int btrfs_mksubvol(struct path *parent, if (dentry->d_inode) goto out_dput; - error = btrfs_may_create(dir, dentry); + error = mnt_want_write(parent->mnt); if (error) goto out_dput; + error = btrfs_may_create(dir, dentry); + if (error) + goto out_drop_write; + down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) goto out_up_read; if (snap_src) { - error = create_snapshot(snap_src, dentry, name, namelen, - async_transid, readonly, inherit); + error = create_snapshot(snap_src, dentry, + name, namelen, async_transid, readonly); } else { error = create_subvol(BTRFS_I(dir)->root, dentry, - name, namelen, async_transid, inherit); + name, namelen, async_transid); } if (!error) fsnotify_mkdir(dir, dentry); out_up_read: up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); +out_drop_write: + mnt_drop_write(parent->mnt); out_dput: dput(dentry); out_unlock: @@ -837,8 +832,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) } static int should_defrag_range(struct inode *inode, u64 start, int thresh, - u64 *last_len, u64 *skip, u64 *defrag_end, - int compress) + u64 *last_len, u64 *skip, u64 *defrag_end) { struct extent_map *em; int ret = 1; @@ -869,7 +863,7 @@ static int should_defrag_range(struct inode *inode, u64 start, int thresh, * we hit a real extent, if it is big or the next extent is not a * real extent, don't bother defragging it */ - if (!compress && (*last_len == 0 || *last_len >= thresh) && + if ((*last_len == 0 || *last_len >= thresh) && (em->len >= thresh || !next_mergeable)) ret = 0; out: @@ -1053,9 +1047,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, u64 newer_than, unsigned long max_to_defrag) { struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_super_block *disk_super; struct file_ra_state *ra = NULL; unsigned long last_index; u64 isize = i_size_read(inode); + u64 features; u64 last_len = 0; u64 skip = 0; u64 defrag_end = 0; @@ -1149,8 +1145,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, extent_thresh, &last_len, &skip, - &defrag_end, range->flags & - BTRFS_DEFRAG_RANGE_COMPRESS)) { + &defrag_end)) { unsigned long next; /* * the should_defrag function tells us how much to skip @@ -1242,8 +1237,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, mutex_unlock(&inode->i_mutex); } + disk_super = root->fs_info->super_copy; + features = btrfs_super_incompat_flags(disk_super); if (range->compress_type == BTRFS_COMPRESS_LZO) { - btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO); + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; + btrfs_set_super_incompat_flags(disk_super, features); } ret = defrag_count; @@ -1381,39 +1379,41 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, } static noinline int btrfs_ioctl_snap_create_transid(struct file *file, - char *name, unsigned long fd, int subvol, - u64 *transid, bool readonly, - struct btrfs_qgroup_inherit **inherit) + char *name, + unsigned long fd, + int subvol, + u64 *transid, + bool readonly) { + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct file *src_file; int namelen; int ret = 0; - ret = mnt_want_write_file(file); - if (ret) - goto out; + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; namelen = strlen(name); if (strchr(name, '/')) { ret = -EINVAL; - goto out_drop_write; + goto out; } if (name[0] == '.' && (namelen == 1 || (name[1] == '.' && namelen == 2))) { ret = -EEXIST; - goto out_drop_write; + goto out; } if (subvol) { ret = btrfs_mksubvol(&file->f_path, name, namelen, - NULL, transid, readonly, inherit); + NULL, transid, readonly); } else { struct inode *src_inode; src_file = fget(fd); if (!src_file) { ret = -EINVAL; - goto out_drop_write; + goto out; } src_inode = src_file->f_path.dentry->d_inode; @@ -1422,15 +1422,13 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, "another FS\n"); ret = -EINVAL; fput(src_file); - goto out_drop_write; + goto out; } ret = btrfs_mksubvol(&file->f_path, name, namelen, BTRFS_I(src_inode)->root, - transid, readonly, inherit); + transid, readonly); fput(src_file); } -out_drop_write: - mnt_drop_write_file(file); out: return ret; } @@ -1448,7 +1446,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, vol_args->fd, subvol, - NULL, false, NULL); + NULL, false); kfree(vol_args); return ret; @@ -1462,7 +1460,6 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, u64 transid = 0; u64 *ptr = NULL; bool readonly = false; - struct btrfs_qgroup_inherit *inherit = NULL; vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) @@ -1470,8 +1467,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; if (vol_args->flags & - ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | - BTRFS_SUBVOL_QGROUP_INHERIT)) { + ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { ret = -EOPNOTSUPP; goto out; } @@ -1480,21 +1476,10 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ptr = &transid; if (vol_args->flags & BTRFS_SUBVOL_RDONLY) readonly = true; - if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { - if (vol_args->size > PAGE_CACHE_SIZE) { - ret = -EINVAL; - goto out; - } - inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); - if (IS_ERR(inherit)) { - ret = PTR_ERR(inherit); - goto out; - } - } ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, - vol_args->fd, subvol, ptr, - readonly, &inherit); + vol_args->fd, subvol, + ptr, readonly); if (ret == 0 && ptr && copy_to_user(arg + @@ -1503,7 +1488,6 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ret = -EFAULT; out: kfree(vol_args); - kfree(inherit); return ret; } @@ -1539,40 +1523,29 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, u64 flags; int ret = 0; - ret = mnt_want_write_file(file); - if (ret) - goto out; + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; - if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { - ret = -EINVAL; - goto out_drop_write; - } + if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) + return -EINVAL; - if (copy_from_user(&flags, arg, sizeof(flags))) { - ret = -EFAULT; - goto out_drop_write; - } + if (copy_from_user(&flags, arg, sizeof(flags))) + return -EFAULT; - if (flags & BTRFS_SUBVOL_CREATE_ASYNC) { - ret = -EINVAL; - goto out_drop_write; - } + if (flags & BTRFS_SUBVOL_CREATE_ASYNC) + return -EINVAL; - if (flags & ~BTRFS_SUBVOL_RDONLY) { - ret = -EOPNOTSUPP; - goto out_drop_write; - } + if (flags & ~BTRFS_SUBVOL_RDONLY) + return -EOPNOTSUPP; - if (!inode_owner_or_capable(inode)) { - ret = -EACCES; - goto out_drop_write; - } + if (!inode_owner_or_capable(inode)) + return -EACCES; down_write(&root->fs_info->subvol_sem); /* nothing to do */ if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) - goto out_drop_sem; + goto out; root_flags = btrfs_root_flags(&root->root_item); if (flags & BTRFS_SUBVOL_RDONLY) @@ -1595,11 +1568,8 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, out_reset: if (ret) btrfs_set_root_flags(&root->root_item, root_flags); -out_drop_sem: - up_write(&root->fs_info->subvol_sem); -out_drop_write: - mnt_drop_write_file(file); out: + up_write(&root->fs_info->subvol_sem); return ret; } @@ -2370,6 +2340,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, goto out_drop_write; } + ret = -EXDEV; + if (src_file->f_path.mnt != file->f_path.mnt) + goto out_fput; + src = src_file->f_dentry->d_inode; ret = -EINVAL; @@ -2390,7 +2364,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, goto out_fput; ret = -EXDEV; - if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root) + if (src->i_sb != inode->i_sb) goto out_fput; ret = -ENOMEM; @@ -2464,13 +2438,14 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, * note the key will change type as we walk through the * tree. */ - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, + 0, 0); if (ret < 0) goto out; nritems = btrfs_header_nritems(path->nodes[0]); if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); + ret = btrfs_next_leaf(BTRFS_I(src)->root, path); if (ret < 0) goto out; if (ret > 0) @@ -2779,6 +2754,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) struct btrfs_path *path; struct btrfs_key location; struct btrfs_disk_key disk_key; + struct btrfs_super_block *disk_super; + u64 features; u64 objectid = 0; u64 dir_id; @@ -2829,7 +2806,12 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); - btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); + disk_super = root->fs_info->super_copy; + features = btrfs_super_incompat_flags(disk_super); + if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { + features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; + btrfs_set_super_incompat_flags(disk_super, features); + } btrfs_end_transaction(trans, root); return 0; @@ -3086,21 +3068,19 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, } static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, - void __user *arg) + void __user *arg, int reset_after_read) { struct btrfs_ioctl_get_dev_stats *sa; int ret; + if (reset_after_read && !capable(CAP_SYS_ADMIN)) + return -EPERM; + sa = memdup_user(arg, sizeof(*sa)); if (IS_ERR(sa)) return PTR_ERR(sa); - if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) { - kfree(sa); - return -EPERM; - } - - ret = btrfs_get_dev_stats(root, sa); + ret = btrfs_get_dev_stats(root, sa, reset_after_read); if (copy_to_user(arg, sa, sizeof(*sa))) ret = -EFAULT; @@ -3290,7 +3270,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - ret = mnt_want_write_file(file); + if (fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + + ret = mnt_want_write(file->f_path.mnt); if (ret) return ret; @@ -3360,7 +3343,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) out: mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); - mnt_drop_write_file(file); + mnt_drop_write(file->f_path.mnt); return ret; } @@ -3412,183 +3395,6 @@ static long btrfs_ioctl_balance_progress(struct btrfs_root *root, return ret; } -static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) -{ - struct btrfs_ioctl_quota_ctl_args *sa; - struct btrfs_trans_handle *trans = NULL; - int ret; - int err; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; - - sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); - - if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { - trans = btrfs_start_transaction(root, 2); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - } - - switch (sa->cmd) { - case BTRFS_QUOTA_CTL_ENABLE: - ret = btrfs_quota_enable(trans, root->fs_info); - break; - case BTRFS_QUOTA_CTL_DISABLE: - ret = btrfs_quota_disable(trans, root->fs_info); - break; - case BTRFS_QUOTA_CTL_RESCAN: - ret = btrfs_quota_rescan(root->fs_info); - break; - default: - ret = -EINVAL; - break; - } - - if (copy_to_user(arg, sa, sizeof(*sa))) - ret = -EFAULT; - - if (trans) { - err = btrfs_commit_transaction(trans, root); - if (err && !ret) - ret = err; - } - -out: - kfree(sa); - return ret; -} - -static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) -{ - struct btrfs_ioctl_qgroup_assign_args *sa; - struct btrfs_trans_handle *trans; - int ret; - int err; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; - - sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); - - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - - /* FIXME: check if the IDs really exist */ - if (sa->assign) { - ret = btrfs_add_qgroup_relation(trans, root->fs_info, - sa->src, sa->dst); - } else { - ret = btrfs_del_qgroup_relation(trans, root->fs_info, - sa->src, sa->dst); - } - - err = btrfs_end_transaction(trans, root); - if (err && !ret) - ret = err; - -out: - kfree(sa); - return ret; -} - -static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) -{ - struct btrfs_ioctl_qgroup_create_args *sa; - struct btrfs_trans_handle *trans; - int ret; - int err; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; - - sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); - - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - - /* FIXME: check if the IDs really exist */ - if (sa->create) { - ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid, - NULL); - } else { - ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid); - } - - err = btrfs_end_transaction(trans, root); - if (err && !ret) - ret = err; - -out: - kfree(sa); - return ret; -} - -static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) -{ - struct btrfs_ioctl_qgroup_limit_args *sa; - struct btrfs_trans_handle *trans; - int ret; - int err; - u64 qgroupid; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; - - sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); - - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - - qgroupid = sa->qgroupid; - if (!qgroupid) { - /* take the current subvol as qgroup */ - qgroupid = root->root_key.objectid; - } - - /* FIXME: check if the IDs really exist */ - ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim); - - err = btrfs_end_transaction(trans, root); - if (err && !ret) - ret = err; - -out: - kfree(sa); - return ret; -} - long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3610,8 +3416,6 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_snap_create_v2(file, argp, 0); case BTRFS_IOC_SUBVOL_CREATE: return btrfs_ioctl_snap_create(file, argp, 1); - case BTRFS_IOC_SUBVOL_CREATE_V2: - return btrfs_ioctl_snap_create_v2(file, argp, 1); case BTRFS_IOC_SNAP_DESTROY: return btrfs_ioctl_snap_destroy(file, argp); case BTRFS_IOC_SUBVOL_GETFLAGS: @@ -3674,15 +3478,9 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_BALANCE_PROGRESS: return btrfs_ioctl_balance_progress(root, argp); case BTRFS_IOC_GET_DEV_STATS: - return btrfs_ioctl_get_dev_stats(root, argp); - case BTRFS_IOC_QUOTA_CTL: - return btrfs_ioctl_quota_ctl(root, argp); - case BTRFS_IOC_QGROUP_ASSIGN: - return btrfs_ioctl_qgroup_assign(root, argp); - case BTRFS_IOC_QGROUP_CREATE: - return btrfs_ioctl_qgroup_create(root, argp); - case BTRFS_IOC_QGROUP_LIMIT: - return btrfs_ioctl_qgroup_limit(root, argp); + return btrfs_ioctl_get_dev_stats(root, argp, 0); + case BTRFS_IOC_GET_AND_RESET_DEV_STATS: + return btrfs_ioctl_get_dev_stats(root, argp, 1); } return -ENOTTY; diff --git a/trunk/fs/btrfs/ioctl.h b/trunk/fs/btrfs/ioctl.h index 3f9701d571ea..e440aa653c30 100644 --- a/trunk/fs/btrfs/ioctl.h +++ b/trunk/fs/btrfs/ioctl.h @@ -32,46 +32,15 @@ struct btrfs_ioctl_vol_args { #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) #define BTRFS_SUBVOL_RDONLY (1ULL << 1) -#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) #define BTRFS_FSID_SIZE 16 #define BTRFS_UUID_SIZE 16 -#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) - -struct btrfs_qgroup_limit { - __u64 flags; - __u64 max_rfer; - __u64 max_excl; - __u64 rsv_rfer; - __u64 rsv_excl; -}; - -struct btrfs_qgroup_inherit { - __u64 flags; - __u64 num_qgroups; - __u64 num_ref_copies; - __u64 num_excl_copies; - struct btrfs_qgroup_limit lim; - __u64 qgroups[0]; -}; - -struct btrfs_ioctl_qgroup_limit_args { - __u64 qgroupid; - struct btrfs_qgroup_limit lim; -}; - #define BTRFS_SUBVOL_NAME_MAX 4039 struct btrfs_ioctl_vol_args_v2 { __s64 fd; __u64 transid; __u64 flags; - union { - struct { - __u64 size; - struct btrfs_qgroup_inherit __user *qgroup_inherit; - }; - __u64 unused[4]; - }; + __u64 unused[4]; char name[BTRFS_SUBVOL_NAME_MAX + 1]; }; @@ -316,13 +285,9 @@ enum btrfs_dev_stat_values { BTRFS_DEV_STAT_VALUES_MAX }; -/* Reset statistics after reading; needs SYS_ADMIN capability */ -#define BTRFS_DEV_STATS_RESET (1ULL << 0) - struct btrfs_ioctl_get_dev_stats { __u64 devid; /* in */ __u64 nr_items; /* in/out */ - __u64 flags; /* in/out */ /* out values: */ __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; @@ -330,25 +295,6 @@ struct btrfs_ioctl_get_dev_stats { __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ }; -#define BTRFS_QUOTA_CTL_ENABLE 1 -#define BTRFS_QUOTA_CTL_DISABLE 2 -#define BTRFS_QUOTA_CTL_RESCAN 3 -struct btrfs_ioctl_quota_ctl_args { - __u64 cmd; - __u64 status; -}; - -struct btrfs_ioctl_qgroup_assign_args { - __u64 assign; - __u64 src; - __u64 dst; -}; - -struct btrfs_ioctl_qgroup_create_args { - __u64 create; - __u64 qgroupid; -}; - #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -393,8 +339,6 @@ struct btrfs_ioctl_qgroup_create_args { #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ struct btrfs_ioctl_vol_args_v2) -#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ - struct btrfs_ioctl_vol_args_v2) #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ @@ -415,16 +359,9 @@ struct btrfs_ioctl_qgroup_create_args { struct btrfs_ioctl_ino_path_args) #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ struct btrfs_ioctl_ino_path_args) -#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ - struct btrfs_ioctl_quota_ctl_args) -#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ - struct btrfs_ioctl_qgroup_assign_args) -#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ - struct btrfs_ioctl_qgroup_create_args) -#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ - struct btrfs_ioctl_qgroup_limit_args) #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ struct btrfs_ioctl_get_dev_stats) +#define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \ + struct btrfs_ioctl_get_dev_stats) + #endif diff --git a/trunk/fs/btrfs/locking.c b/trunk/fs/btrfs/locking.c index a44eff074805..272f911203ff 100644 --- a/trunk/fs/btrfs/locking.c +++ b/trunk/fs/btrfs/locking.c @@ -78,15 +78,13 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) write_lock(&eb->lock); WARN_ON(atomic_read(&eb->spinning_writers)); atomic_inc(&eb->spinning_writers); - if (atomic_dec_and_test(&eb->blocking_writers) && - waitqueue_active(&eb->write_lock_wq)) + if (atomic_dec_and_test(&eb->blocking_writers)) wake_up(&eb->write_lock_wq); } else if (rw == BTRFS_READ_LOCK_BLOCKING) { BUG_ON(atomic_read(&eb->blocking_readers) == 0); read_lock(&eb->lock); atomic_inc(&eb->spinning_readers); - if (atomic_dec_and_test(&eb->blocking_readers) && - waitqueue_active(&eb->read_lock_wq)) + if (atomic_dec_and_test(&eb->blocking_readers)) wake_up(&eb->read_lock_wq); } return; @@ -201,8 +199,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) } btrfs_assert_tree_read_locked(eb); WARN_ON(atomic_read(&eb->blocking_readers) == 0); - if (atomic_dec_and_test(&eb->blocking_readers) && - waitqueue_active(&eb->read_lock_wq)) + if (atomic_dec_and_test(&eb->blocking_readers)) wake_up(&eb->read_lock_wq); atomic_dec(&eb->read_locks); } @@ -250,9 +247,8 @@ void btrfs_tree_unlock(struct extent_buffer *eb) if (blockers) { WARN_ON(atomic_read(&eb->spinning_writers)); atomic_dec(&eb->blocking_writers); - smp_mb(); - if (waitqueue_active(&eb->write_lock_wq)) - wake_up(&eb->write_lock_wq); + smp_wmb(); + wake_up(&eb->write_lock_wq); } else { WARN_ON(atomic_read(&eb->spinning_writers) != 1); atomic_dec(&eb->spinning_writers); diff --git a/trunk/fs/btrfs/qgroup.c b/trunk/fs/btrfs/qgroup.c deleted file mode 100644 index bc424ae5a81a..000000000000 --- a/trunk/fs/btrfs/qgroup.c +++ /dev/null @@ -1,1571 +0,0 @@ -/* - * Copyright (C) 2011 STRATO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "ctree.h" -#include "transaction.h" -#include "disk-io.h" -#include "locking.h" -#include "ulist.h" -#include "ioctl.h" -#include "backref.h" - -/* TODO XXX FIXME - * - subvol delete -> delete when ref goes to 0? delete limits also? - * - reorganize keys - * - compressed - * - sync - * - rescan - * - copy also limits on subvol creation - * - limit - * - caches fuer ulists - * - performance benchmarks - * - check all ioctl parameters - */ - -/* - * one struct for each qgroup, organized in fs_info->qgroup_tree. - */ -struct btrfs_qgroup { - u64 qgroupid; - - /* - * state - */ - u64 rfer; /* referenced */ - u64 rfer_cmpr; /* referenced compressed */ - u64 excl; /* exclusive */ - u64 excl_cmpr; /* exclusive compressed */ - - /* - * limits - */ - u64 lim_flags; /* which limits are set */ - u64 max_rfer; - u64 max_excl; - u64 rsv_rfer; - u64 rsv_excl; - - /* - * reservation tracking - */ - u64 reserved; - - /* - * lists - */ - struct list_head groups; /* groups this group is member of */ - struct list_head members; /* groups that are members of this group */ - struct list_head dirty; /* dirty groups */ - struct rb_node node; /* tree of qgroups */ - - /* - * temp variables for accounting operations - */ - u64 tag; - u64 refcnt; -}; - -/* - * glue structure to represent the relations between qgroups. - */ -struct btrfs_qgroup_list { - struct list_head next_group; - struct list_head next_member; - struct btrfs_qgroup *group; - struct btrfs_qgroup *member; -}; - -/* must be called with qgroup_lock held */ -static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, - u64 qgroupid) -{ - struct rb_node *n = fs_info->qgroup_tree.rb_node; - struct btrfs_qgroup *qgroup; - - while (n) { - qgroup = rb_entry(n, struct btrfs_qgroup, node); - if (qgroup->qgroupid < qgroupid) - n = n->rb_left; - else if (qgroup->qgroupid > qgroupid) - n = n->rb_right; - else - return qgroup; - } - return NULL; -} - -/* must be called with qgroup_lock held */ -static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, - u64 qgroupid) -{ - struct rb_node **p = &fs_info->qgroup_tree.rb_node; - struct rb_node *parent = NULL; - struct btrfs_qgroup *qgroup; - - while (*p) { - parent = *p; - qgroup = rb_entry(parent, struct btrfs_qgroup, node); - - if (qgroup->qgroupid < qgroupid) - p = &(*p)->rb_left; - else if (qgroup->qgroupid > qgroupid) - p = &(*p)->rb_right; - else - return qgroup; - } - - qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); - if (!qgroup) - return ERR_PTR(-ENOMEM); - - qgroup->qgroupid = qgroupid; - INIT_LIST_HEAD(&qgroup->groups); - INIT_LIST_HEAD(&qgroup->members); - INIT_LIST_HEAD(&qgroup->dirty); - - rb_link_node(&qgroup->node, parent, p); - rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); - - return qgroup; -} - -/* must be called with qgroup_lock held */ -static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) -{ - struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); - struct btrfs_qgroup_list *list; - - if (!qgroup) - return -ENOENT; - - rb_erase(&qgroup->node, &fs_info->qgroup_tree); - list_del(&qgroup->dirty); - - while (!list_empty(&qgroup->groups)) { - list = list_first_entry(&qgroup->groups, - struct btrfs_qgroup_list, next_group); - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - } - - while (!list_empty(&qgroup->members)) { - list = list_first_entry(&qgroup->members, - struct btrfs_qgroup_list, next_member); - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - } - kfree(qgroup); - - return 0; -} - -/* must be called with qgroup_lock held */ -static int add_relation_rb(struct btrfs_fs_info *fs_info, - u64 memberid, u64 parentid) -{ - struct btrfs_qgroup *member; - struct btrfs_qgroup *parent; - struct btrfs_qgroup_list *list; - - member = find_qgroup_rb(fs_info, memberid); - parent = find_qgroup_rb(fs_info, parentid); - if (!member || !parent) - return -ENOENT; - - list = kzalloc(sizeof(*list), GFP_ATOMIC); - if (!list) - return -ENOMEM; - - list->group = parent; - list->member = member; - list_add_tail(&list->next_group, &member->groups); - list_add_tail(&list->next_member, &parent->members); - - return 0; -} - -/* must be called with qgroup_lock held */ -static int del_relation_rb(struct btrfs_fs_info *fs_info, - u64 memberid, u64 parentid) -{ - struct btrfs_qgroup *member; - struct btrfs_qgroup *parent; - struct btrfs_qgroup_list *list; - - member = find_qgroup_rb(fs_info, memberid); - parent = find_qgroup_rb(fs_info, parentid); - if (!member || !parent) - return -ENOENT; - - list_for_each_entry(list, &member->groups, next_group) { - if (list->group == parent) { - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - return 0; - } - } - return -ENOENT; -} - -/* - * The full config is read in one go, only called from open_ctree() - * It doesn't use any locking, as at this point we're still single-threaded - */ -int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) -{ - struct btrfs_key key; - struct btrfs_key found_key; - struct btrfs_root *quota_root = fs_info->quota_root; - struct btrfs_path *path = NULL; - struct extent_buffer *l; - int slot; - int ret = 0; - u64 flags = 0; - - if (!fs_info->quota_enabled) - return 0; - - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - - /* default this to quota off, in case no status key is found */ - fs_info->qgroup_flags = 0; - - /* - * pass 1: read status, all qgroup infos and limits - */ - key.objectid = 0; - key.type = 0; - key.offset = 0; - ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); - if (ret) - goto out; - - while (1) { - struct btrfs_qgroup *qgroup; - - slot = path->slots[0]; - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &found_key, slot); - - if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { - struct btrfs_qgroup_status_item *ptr; - - ptr = btrfs_item_ptr(l, slot, - struct btrfs_qgroup_status_item); - - if (btrfs_qgroup_status_version(l, ptr) != - BTRFS_QGROUP_STATUS_VERSION) { - printk(KERN_ERR - "btrfs: old qgroup version, quota disabled\n"); - goto out; - } - if (btrfs_qgroup_status_generation(l, ptr) != - fs_info->generation) { - flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - printk(KERN_ERR - "btrfs: qgroup generation mismatch, " - "marked as inconsistent\n"); - } - fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, - ptr); - /* FIXME read scan element */ - goto next1; - } - - if (found_key.type != BTRFS_QGROUP_INFO_KEY && - found_key.type != BTRFS_QGROUP_LIMIT_KEY) - goto next1; - - qgroup = find_qgroup_rb(fs_info, found_key.offset); - if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || - (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { - printk(KERN_ERR "btrfs: inconsitent qgroup config\n"); - flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - } - if (!qgroup) { - qgroup = add_qgroup_rb(fs_info, found_key.offset); - if (IS_ERR(qgroup)) { - ret = PTR_ERR(qgroup); - goto out; - } - } - switch (found_key.type) { - case BTRFS_QGROUP_INFO_KEY: { - struct btrfs_qgroup_info_item *ptr; - - ptr = btrfs_item_ptr(l, slot, - struct btrfs_qgroup_info_item); - qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); - qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); - qgroup->excl = btrfs_qgroup_info_excl(l, ptr); - qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); - /* generation currently unused */ - break; - } - case BTRFS_QGROUP_LIMIT_KEY: { - struct btrfs_qgroup_limit_item *ptr; - - ptr = btrfs_item_ptr(l, slot, - struct btrfs_qgroup_limit_item); - qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); - qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); - qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); - qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); - qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); - break; - } - } -next1: - ret = btrfs_next_item(quota_root, path); - if (ret < 0) - goto out; - if (ret) - break; - } - btrfs_release_path(path); - - /* - * pass 2: read all qgroup relations - */ - key.objectid = 0; - key.type = BTRFS_QGROUP_RELATION_KEY; - key.offset = 0; - ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); - if (ret) - goto out; - while (1) { - slot = path->slots[0]; - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &found_key, slot); - - if (found_key.type != BTRFS_QGROUP_RELATION_KEY) - goto next2; - - if (found_key.objectid > found_key.offset) { - /* parent <- member, not needed to build config */ - /* FIXME should we omit the key completely? */ - goto next2; - } - - ret = add_relation_rb(fs_info, found_key.objectid, - found_key.offset); - if (ret) - goto out; -next2: - ret = btrfs_next_item(quota_root, path); - if (ret < 0) - goto out; - if (ret) - break; - } -out: - fs_info->qgroup_flags |= flags; - if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { - fs_info->quota_enabled = 0; - fs_info->pending_quota_state = 0; - } - btrfs_free_path(path); - - return ret < 0 ? ret : 0; -} - -/* - * This is only called from close_ctree() or open_ctree(), both in single- - * treaded paths. Clean up the in-memory structures. No locking needed. - */ -void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) -{ - struct rb_node *n; - struct btrfs_qgroup *qgroup; - struct btrfs_qgroup_list *list; - - while ((n = rb_first(&fs_info->qgroup_tree))) { - qgroup = rb_entry(n, struct btrfs_qgroup, node); - rb_erase(n, &fs_info->qgroup_tree); - - WARN_ON(!list_empty(&qgroup->dirty)); - - while (!list_empty(&qgroup->groups)) { - list = list_first_entry(&qgroup->groups, - struct btrfs_qgroup_list, - next_group); - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - } - - while (!list_empty(&qgroup->members)) { - list = list_first_entry(&qgroup->members, - struct btrfs_qgroup_list, - next_member); - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - } - kfree(qgroup); - } -} - -static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, - struct btrfs_root *quota_root, - u64 src, u64 dst) -{ - int ret; - struct btrfs_path *path; - struct btrfs_key key; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - key.objectid = src; - key.type = BTRFS_QGROUP_RELATION_KEY; - key.offset = dst; - - ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); - - btrfs_mark_buffer_dirty(path->nodes[0]); - - btrfs_free_path(path); - return ret; -} - -static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, - struct btrfs_root *quota_root, - u64 src, u64 dst) -{ - int ret; - struct btrfs_path *path; - struct btrfs_key key; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - key.objectid = src; - key.type = BTRFS_QGROUP_RELATION_KEY; - key.offset = dst; - - ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); - if (ret < 0) - goto out; - - if (ret > 0) { - ret = -ENOENT; - goto out; - } - - ret = btrfs_del_item(trans, quota_root, path); -out: - btrfs_free_path(path); - return ret; -} - -static int add_qgroup_item(struct btrfs_trans_handle *trans, - struct btrfs_root *quota_root, u64 qgroupid) -{ - int ret; - struct btrfs_path *path; - struct btrfs_qgroup_info_item *qgroup_info; - struct btrfs_qgroup_limit_item *qgroup_limit; - struct extent_buffer *leaf; - struct btrfs_key key; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - key.objectid = 0; - key.type = BTRFS_QGROUP_INFO_KEY; - key.offset = qgroupid; - - ret = btrfs_insert_empty_item(trans, quota_root, path, &key, - sizeof(*qgroup_info)); - if (ret) - goto out; - - leaf = path->nodes[0]; - qgroup_info = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_qgroup_info_item); - btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); - btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); - btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); - btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); - btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); - - btrfs_mark_buffer_dirty(leaf); - - btrfs_release_path(path); - - key.type = BTRFS_QGROUP_LIMIT_KEY; - ret = btrfs_insert_empty_item(trans, quota_root, path, &key, - sizeof(*qgroup_limit)); - if (ret) - goto out; - - leaf = path->nodes[0]; - qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_qgroup_limit_item); - btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); - btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); - btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); - btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); - btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); - - btrfs_mark_buffer_dirty(leaf); - - ret = 0; -out: - btrfs_free_path(path); - return ret; -} - -static int del_qgroup_item(struct btrfs_trans_handle *trans, - struct btrfs_root *quota_root, u64 qgroupid) -{ - int ret; - struct btrfs_path *path; - struct btrfs_key key; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - key.objectid = 0; - key.type = BTRFS_QGROUP_INFO_KEY; - key.offset = qgroupid; - ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); - if (ret < 0) - goto out; - - if (ret > 0) { - ret = -ENOENT; - goto out; - } - - ret = btrfs_del_item(trans, quota_root, path); - if (ret) - goto out; - - btrfs_release_path(path); - - key.type = BTRFS_QGROUP_LIMIT_KEY; - ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); - if (ret < 0) - goto out; - - if (ret > 0) { - ret = -ENOENT; - goto out; - } - - ret = btrfs_del_item(trans, quota_root, path); - -out: - btrfs_free_path(path); - return ret; -} - -static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 qgroupid, - u64 flags, u64 max_rfer, u64 max_excl, - u64 rsv_rfer, u64 rsv_excl) -{ - struct btrfs_path *path; - struct btrfs_key key; - struct extent_buffer *l; - struct btrfs_qgroup_limit_item *qgroup_limit; - int ret; - int slot; - - key.objectid = 0; - key.type = BTRFS_QGROUP_LIMIT_KEY; - key.offset = qgroupid; - - path = btrfs_alloc_path(); - BUG_ON(!path); - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret > 0) - ret = -ENOENT; - - if (ret) - goto out; - - l = path->nodes[0]; - slot = path->slots[0]; - qgroup_limit = btrfs_item_ptr(l, path->slots[0], - struct btrfs_qgroup_limit_item); - btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); - btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); - btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); - btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer); - btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl); - - btrfs_mark_buffer_dirty(l); - -out: - btrfs_free_path(path); - return ret; -} - -static int update_qgroup_info_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_qgroup *qgroup) -{ - struct btrfs_path *path; - struct btrfs_key key; - struct extent_buffer *l; - struct btrfs_qgroup_info_item *qgroup_info; - int ret; - int slot; - - key.objectid = 0; - key.type = BTRFS_QGROUP_INFO_KEY; - key.offset = qgroup->qgroupid; - - path = btrfs_alloc_path(); - BUG_ON(!path); - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret > 0) - ret = -ENOENT; - - if (ret) - goto out; - - l = path->nodes[0]; - slot = path->slots[0]; - qgroup_info = btrfs_item_ptr(l, path->slots[0], - struct btrfs_qgroup_info_item); - btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); - btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); - btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); - btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); - btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); - - btrfs_mark_buffer_dirty(l); - -out: - btrfs_free_path(path); - return ret; -} - -static int update_qgroup_status_item(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_root *root) -{ - struct btrfs_path *path; - struct btrfs_key key; - struct extent_buffer *l; - struct btrfs_qgroup_status_item *ptr; - int ret; - int slot; - - key.objectid = 0; - key.type = BTRFS_QGROUP_STATUS_KEY; - key.offset = 0; - - path = btrfs_alloc_path(); - BUG_ON(!path); - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret > 0) - ret = -ENOENT; - - if (ret) - goto out; - - l = path->nodes[0]; - slot = path->slots[0]; - ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); - btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); - btrfs_set_qgroup_status_generation(l, ptr, trans->transid); - /* XXX scan */ - - btrfs_mark_buffer_dirty(l); - -out: - btrfs_free_path(path); - return ret; -} - -/* - * called with qgroup_lock held - */ -static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - struct btrfs_path *path; - struct btrfs_key key; - int ret; - - if (!root) - return -EINVAL; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - while (1) { - key.objectid = 0; - key.offset = 0; - key.type = 0; - - path->leave_spinning = 1; - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret > 0) { - if (path->slots[0] == 0) - break; - path->slots[0]--; - } else if (ret < 0) { - break; - } - - ret = btrfs_del_item(trans, root, path); - if (ret) - goto out; - btrfs_release_path(path); - } - ret = 0; -out: - root->fs_info->pending_quota_state = 0; - btrfs_free_path(path); - return ret; -} - -int btrfs_quota_enable(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - struct btrfs_root *quota_root; - struct btrfs_path *path = NULL; - struct btrfs_qgroup_status_item *ptr; - struct extent_buffer *leaf; - struct btrfs_key key; - int ret = 0; - - spin_lock(&fs_info->qgroup_lock); - if (fs_info->quota_root) { - fs_info->pending_quota_state = 1; - spin_unlock(&fs_info->qgroup_lock); - goto out; - } - spin_unlock(&fs_info->qgroup_lock); - - /* - * initially create the quota tree - */ - quota_root = btrfs_create_tree(trans, fs_info, - BTRFS_QUOTA_TREE_OBJECTID); - if (IS_ERR(quota_root)) { - ret = PTR_ERR(quota_root); - goto out; - } - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - key.objectid = 0; - key.type = BTRFS_QGROUP_STATUS_KEY; - key.offset = 0; - - ret = btrfs_insert_empty_item(trans, quota_root, path, &key, - sizeof(*ptr)); - if (ret) - goto out; - - leaf = path->nodes[0]; - ptr = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_qgroup_status_item); - btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); - btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); - fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | - BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); - btrfs_set_qgroup_status_scan(leaf, ptr, 0); - - btrfs_mark_buffer_dirty(leaf); - - spin_lock(&fs_info->qgroup_lock); - fs_info->quota_root = quota_root; - fs_info->pending_quota_state = 1; - spin_unlock(&fs_info->qgroup_lock); -out: - btrfs_free_path(path); - return ret; -} - -int btrfs_quota_disable(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - struct btrfs_root *tree_root = fs_info->tree_root; - struct btrfs_root *quota_root; - int ret = 0; - - spin_lock(&fs_info->qgroup_lock); - fs_info->quota_enabled = 0; - fs_info->pending_quota_state = 0; - quota_root = fs_info->quota_root; - fs_info->quota_root = NULL; - btrfs_free_qgroup_config(fs_info); - spin_unlock(&fs_info->qgroup_lock); - - if (!quota_root) - return -EINVAL; - - ret = btrfs_clean_quota_tree(trans, quota_root); - if (ret) - goto out; - - ret = btrfs_del_root(trans, tree_root, "a_root->root_key); - if (ret) - goto out; - - list_del("a_root->dirty_list); - - btrfs_tree_lock(quota_root->node); - clean_tree_block(trans, tree_root, quota_root->node); - btrfs_tree_unlock(quota_root->node); - btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); - - free_extent_buffer(quota_root->node); - free_extent_buffer(quota_root->commit_root); - kfree(quota_root); -out: - return ret; -} - -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info) -{ - /* FIXME */ - return 0; -} - -int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 src, u64 dst) -{ - struct btrfs_root *quota_root; - int ret = 0; - - quota_root = fs_info->quota_root; - if (!quota_root) - return -EINVAL; - - ret = add_qgroup_relation_item(trans, quota_root, src, dst); - if (ret) - return ret; - - ret = add_qgroup_relation_item(trans, quota_root, dst, src); - if (ret) { - del_qgroup_relation_item(trans, quota_root, src, dst); - return ret; - } - - spin_lock(&fs_info->qgroup_lock); - ret = add_relation_rb(quota_root->fs_info, src, dst); - spin_unlock(&fs_info->qgroup_lock); - - return ret; -} - -int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 src, u64 dst) -{ - struct btrfs_root *quota_root; - int ret = 0; - int err; - - quota_root = fs_info->quota_root; - if (!quota_root) - return -EINVAL; - - ret = del_qgroup_relation_item(trans, quota_root, src, dst); - err = del_qgroup_relation_item(trans, quota_root, dst, src); - if (err && !ret) - ret = err; - - spin_lock(&fs_info->qgroup_lock); - del_relation_rb(fs_info, src, dst); - - spin_unlock(&fs_info->qgroup_lock); - - return ret; -} - -int btrfs_create_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid, char *name) -{ - struct btrfs_root *quota_root; - struct btrfs_qgroup *qgroup; - int ret = 0; - - quota_root = fs_info->quota_root; - if (!quota_root) - return -EINVAL; - - ret = add_qgroup_item(trans, quota_root, qgroupid); - - spin_lock(&fs_info->qgroup_lock); - qgroup = add_qgroup_rb(fs_info, qgroupid); - spin_unlock(&fs_info->qgroup_lock); - - if (IS_ERR(qgroup)) - ret = PTR_ERR(qgroup); - - return ret; -} - -int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid) -{ - struct btrfs_root *quota_root; - int ret = 0; - - quota_root = fs_info->quota_root; - if (!quota_root) - return -EINVAL; - - ret = del_qgroup_item(trans, quota_root, qgroupid); - - spin_lock(&fs_info->qgroup_lock); - del_qgroup_rb(quota_root->fs_info, qgroupid); - - spin_unlock(&fs_info->qgroup_lock); - - return ret; -} - -int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid, - struct btrfs_qgroup_limit *limit) -{ - struct btrfs_root *quota_root = fs_info->quota_root; - struct btrfs_qgroup *qgroup; - int ret = 0; - - if (!quota_root) - return -EINVAL; - - ret = update_qgroup_limit_item(trans, quota_root, qgroupid, - limit->flags, limit->max_rfer, - limit->max_excl, limit->rsv_rfer, - limit->rsv_excl); - if (ret) { - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - printk(KERN_INFO "unable to update quota limit for %llu\n", - (unsigned long long)qgroupid); - } - - spin_lock(&fs_info->qgroup_lock); - - qgroup = find_qgroup_rb(fs_info, qgroupid); - if (!qgroup) { - ret = -ENOENT; - goto unlock; - } - qgroup->lim_flags = limit->flags; - qgroup->max_rfer = limit->max_rfer; - qgroup->max_excl = limit->max_excl; - qgroup->rsv_rfer = limit->rsv_rfer; - qgroup->rsv_excl = limit->rsv_excl; - -unlock: - spin_unlock(&fs_info->qgroup_lock); - - return ret; -} - -static void qgroup_dirty(struct btrfs_fs_info *fs_info, - struct btrfs_qgroup *qgroup) -{ - if (list_empty(&qgroup->dirty)) - list_add(&qgroup->dirty, &fs_info->dirty_qgroups); -} - -/* - * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts - * the modification into a list that's later used by btrfs_end_transaction to - * pass the recorded modifications on to btrfs_qgroup_account_ref. - */ -int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_node *node, - struct btrfs_delayed_extent_op *extent_op) -{ - struct qgroup_update *u; - - BUG_ON(!trans->delayed_ref_elem.seq); - u = kmalloc(sizeof(*u), GFP_NOFS); - if (!u) - return -ENOMEM; - - u->node = node; - u->extent_op = extent_op; - list_add_tail(&u->list, &trans->qgroup_ref_list); - - return 0; -} - -/* - * btrfs_qgroup_account_ref is called for every ref that is added to or deleted - * from the fs. First, all roots referencing the extent are searched, and - * then the space is accounted accordingly to the different roots. The - * accounting algorithm works in 3 steps documented inline. - */ -int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *node, - struct btrfs_delayed_extent_op *extent_op) -{ - struct btrfs_key ins; - struct btrfs_root *quota_root; - u64 ref_root; - struct btrfs_qgroup *qgroup; - struct ulist_node *unode; - struct ulist *roots = NULL; - struct ulist *tmp = NULL; - struct ulist_iterator uiter; - u64 seq; - int ret = 0; - int sgn; - - if (!fs_info->quota_enabled) - return 0; - - BUG_ON(!fs_info->quota_root); - - ins.objectid = node->bytenr; - ins.offset = node->num_bytes; - ins.type = BTRFS_EXTENT_ITEM_KEY; - - if (node->type == BTRFS_TREE_BLOCK_REF_KEY || - node->type == BTRFS_SHARED_BLOCK_REF_KEY) { - struct btrfs_delayed_tree_ref *ref; - ref = btrfs_delayed_node_to_tree_ref(node); - ref_root = ref->root; - } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || - node->type == BTRFS_SHARED_DATA_REF_KEY) { - struct btrfs_delayed_data_ref *ref; - ref = btrfs_delayed_node_to_data_ref(node); - ref_root = ref->root; - } else { - BUG(); - } - - if (!is_fstree(ref_root)) { - /* - * non-fs-trees are not being accounted - */ - return 0; - } - - switch (node->action) { - case BTRFS_ADD_DELAYED_REF: - case BTRFS_ADD_DELAYED_EXTENT: - sgn = 1; - break; - case BTRFS_DROP_DELAYED_REF: - sgn = -1; - break; - case BTRFS_UPDATE_DELAYED_HEAD: - return 0; - default: - BUG(); - } - - /* - * the delayed ref sequence number we pass depends on the direction of - * the operation. for add operations, we pass (node->seq - 1) to skip - * the delayed ref's current sequence number, because we need the state - * of the tree before the add operation. for delete operations, we pass - * (node->seq) to include the delayed ref's current sequence number, - * because we need the state of the tree after the delete operation. - */ - ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, - sgn > 0 ? node->seq - 1 : node->seq, &roots); - if (ret < 0) - goto out; - - spin_lock(&fs_info->qgroup_lock); - quota_root = fs_info->quota_root; - if (!quota_root) - goto unlock; - - qgroup = find_qgroup_rb(fs_info, ref_root); - if (!qgroup) - goto unlock; - - /* - * step 1: for each old ref, visit all nodes once and inc refcnt - */ - tmp = ulist_alloc(GFP_ATOMIC); - if (!tmp) { - ret = -ENOMEM; - goto unlock; - } - seq = fs_info->qgroup_seq; - fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ - - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(roots, &uiter))) { - struct ulist_node *tmp_unode; - struct ulist_iterator tmp_uiter; - struct btrfs_qgroup *qg; - - qg = find_qgroup_rb(fs_info, unode->val); - if (!qg) - continue; - - ulist_reinit(tmp); - /* XXX id not needed */ - ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); - ULIST_ITER_INIT(&tmp_uiter); - while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { - struct btrfs_qgroup_list *glist; - - qg = (struct btrfs_qgroup *)tmp_unode->aux; - if (qg->refcnt < seq) - qg->refcnt = seq + 1; - else - ++qg->refcnt; - - list_for_each_entry(glist, &qg->groups, next_group) { - ulist_add(tmp, glist->group->qgroupid, - (unsigned long)glist->group, - GFP_ATOMIC); - } - } - } - - /* - * step 2: walk from the new root - */ - ulist_reinit(tmp); - ulist_add(tmp, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(tmp, &uiter))) { - struct btrfs_qgroup *qg; - struct btrfs_qgroup_list *glist; - - qg = (struct btrfs_qgroup *)unode->aux; - if (qg->refcnt < seq) { - /* not visited by step 1 */ - qg->rfer += sgn * node->num_bytes; - qg->rfer_cmpr += sgn * node->num_bytes; - if (roots->nnodes == 0) { - qg->excl += sgn * node->num_bytes; - qg->excl_cmpr += sgn * node->num_bytes; - } - qgroup_dirty(fs_info, qg); - } - WARN_ON(qg->tag >= seq); - qg->tag = seq; - - list_for_each_entry(glist, &qg->groups, next_group) { - ulist_add(tmp, glist->group->qgroupid, - (unsigned long)glist->group, GFP_ATOMIC); - } - } - - /* - * step 3: walk again from old refs - */ - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(roots, &uiter))) { - struct btrfs_qgroup *qg; - struct ulist_node *tmp_unode; - struct ulist_iterator tmp_uiter; - - qg = find_qgroup_rb(fs_info, unode->val); - if (!qg) - continue; - - ulist_reinit(tmp); - ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); - ULIST_ITER_INIT(&tmp_uiter); - while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { - struct btrfs_qgroup_list *glist; - - qg = (struct btrfs_qgroup *)tmp_unode->aux; - if (qg->tag == seq) - continue; - - if (qg->refcnt - seq == roots->nnodes) { - qg->excl -= sgn * node->num_bytes; - qg->excl_cmpr -= sgn * node->num_bytes; - qgroup_dirty(fs_info, qg); - } - - list_for_each_entry(glist, &qg->groups, next_group) { - ulist_add(tmp, glist->group->qgroupid, - (unsigned long)glist->group, - GFP_ATOMIC); - } - } - } - ret = 0; -unlock: - spin_unlock(&fs_info->qgroup_lock); -out: - ulist_free(roots); - ulist_free(tmp); - - return ret; -} - -/* - * called from commit_transaction. Writes all changed qgroups to disk. - */ -int btrfs_run_qgroups(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - struct btrfs_root *quota_root = fs_info->quota_root; - int ret = 0; - - if (!quota_root) - goto out; - - fs_info->quota_enabled = fs_info->pending_quota_state; - - spin_lock(&fs_info->qgroup_lock); - while (!list_empty(&fs_info->dirty_qgroups)) { - struct btrfs_qgroup *qgroup; - qgroup = list_first_entry(&fs_info->dirty_qgroups, - struct btrfs_qgroup, dirty); - list_del_init(&qgroup->dirty); - spin_unlock(&fs_info->qgroup_lock); - ret = update_qgroup_info_item(trans, quota_root, qgroup); - if (ret) - fs_info->qgroup_flags |= - BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - spin_lock(&fs_info->qgroup_lock); - } - if (fs_info->quota_enabled) - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; - else - fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; - spin_unlock(&fs_info->qgroup_lock); - - ret = update_qgroup_status_item(trans, fs_info, quota_root); - if (ret) - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - -out: - - return ret; -} - -/* - * copy the acounting information between qgroups. This is necessary when a - * snapshot or a subvolume is created - */ -int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, - struct btrfs_qgroup_inherit *inherit) -{ - int ret = 0; - int i; - u64 *i_qgroups; - struct btrfs_root *quota_root = fs_info->quota_root; - struct btrfs_qgroup *srcgroup; - struct btrfs_qgroup *dstgroup; - u32 level_size = 0; - - if (!fs_info->quota_enabled) - return 0; - - if (!quota_root) - return -EINVAL; - - /* - * create a tracking group for the subvol itself - */ - ret = add_qgroup_item(trans, quota_root, objectid); - if (ret) - goto out; - - if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { - ret = update_qgroup_limit_item(trans, quota_root, objectid, - inherit->lim.flags, - inherit->lim.max_rfer, - inherit->lim.max_excl, - inherit->lim.rsv_rfer, - inherit->lim.rsv_excl); - if (ret) - goto out; - } - - if (srcid) { - struct btrfs_root *srcroot; - struct btrfs_key srckey; - int srcroot_level; - - srckey.objectid = srcid; - srckey.type = BTRFS_ROOT_ITEM_KEY; - srckey.offset = (u64)-1; - srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); - if (IS_ERR(srcroot)) { - ret = PTR_ERR(srcroot); - goto out; - } - - rcu_read_lock(); - srcroot_level = btrfs_header_level(srcroot->node); - level_size = btrfs_level_size(srcroot, srcroot_level); - rcu_read_unlock(); - } - - /* - * add qgroup to all inherited groups - */ - if (inherit) { - i_qgroups = (u64 *)(inherit + 1); - for (i = 0; i < inherit->num_qgroups; ++i) { - ret = add_qgroup_relation_item(trans, quota_root, - objectid, *i_qgroups); - if (ret) - goto out; - ret = add_qgroup_relation_item(trans, quota_root, - *i_qgroups, objectid); - if (ret) - goto out; - ++i_qgroups; - } - } - - - spin_lock(&fs_info->qgroup_lock); - - dstgroup = add_qgroup_rb(fs_info, objectid); - if (!dstgroup) - goto unlock; - - if (srcid) { - srcgroup = find_qgroup_rb(fs_info, srcid); - if (!srcgroup) - goto unlock; - dstgroup->rfer = srcgroup->rfer - level_size; - dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; - srcgroup->excl = level_size; - srcgroup->excl_cmpr = level_size; - qgroup_dirty(fs_info, dstgroup); - qgroup_dirty(fs_info, srcgroup); - } - - if (!inherit) - goto unlock; - - i_qgroups = (u64 *)(inherit + 1); - for (i = 0; i < inherit->num_qgroups; ++i) { - ret = add_relation_rb(quota_root->fs_info, objectid, - *i_qgroups); - if (ret) - goto unlock; - ++i_qgroups; - } - - for (i = 0; i < inherit->num_ref_copies; ++i) { - struct btrfs_qgroup *src; - struct btrfs_qgroup *dst; - - src = find_qgroup_rb(fs_info, i_qgroups[0]); - dst = find_qgroup_rb(fs_info, i_qgroups[1]); - - if (!src || !dst) { - ret = -EINVAL; - goto unlock; - } - - dst->rfer = src->rfer - level_size; - dst->rfer_cmpr = src->rfer_cmpr - level_size; - i_qgroups += 2; - } - for (i = 0; i < inherit->num_excl_copies; ++i) { - struct btrfs_qgroup *src; - struct btrfs_qgroup *dst; - - src = find_qgroup_rb(fs_info, i_qgroups[0]); - dst = find_qgroup_rb(fs_info, i_qgroups[1]); - - if (!src || !dst) { - ret = -EINVAL; - goto unlock; - } - - dst->excl = src->excl + level_size; - dst->excl_cmpr = src->excl_cmpr + level_size; - i_qgroups += 2; - } - -unlock: - spin_unlock(&fs_info->qgroup_lock); -out: - return ret; -} - -/* - * reserve some space for a qgroup and all its parents. The reservation takes - * place with start_transaction or dealloc_reserve, similar to ENOSPC - * accounting. If not enough space is available, EDQUOT is returned. - * We assume that the requested space is new for all qgroups. - */ -int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) -{ - struct btrfs_root *quota_root; - struct btrfs_qgroup *qgroup; - struct btrfs_fs_info *fs_info = root->fs_info; - u64 ref_root = root->root_key.objectid; - int ret = 0; - struct ulist *ulist = NULL; - struct ulist_node *unode; - struct ulist_iterator uiter; - - if (!is_fstree(ref_root)) - return 0; - - if (num_bytes == 0) - return 0; - - spin_lock(&fs_info->qgroup_lock); - quota_root = fs_info->quota_root; - if (!quota_root) - goto out; - - qgroup = find_qgroup_rb(fs_info, ref_root); - if (!qgroup) - goto out; - - /* - * in a first step, we check all affected qgroups if any limits would - * be exceeded - */ - ulist = ulist_alloc(GFP_ATOMIC); - ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(ulist, &uiter))) { - struct btrfs_qgroup *qg; - struct btrfs_qgroup_list *glist; - - qg = (struct btrfs_qgroup *)unode->aux; - - if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && - qg->reserved + qg->rfer + num_bytes > - qg->max_rfer) - ret = -EDQUOT; - - if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && - qg->reserved + qg->excl + num_bytes > - qg->max_excl) - ret = -EDQUOT; - - list_for_each_entry(glist, &qg->groups, next_group) { - ulist_add(ulist, glist->group->qgroupid, - (unsigned long)glist->group, GFP_ATOMIC); - } - } - if (ret) - goto out; - - /* - * no limits exceeded, now record the reservation into all qgroups - */ - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(ulist, &uiter))) { - struct btrfs_qgroup *qg; - - qg = (struct btrfs_qgroup *)unode->aux; - - qg->reserved += num_bytes; - } - -out: - spin_unlock(&fs_info->qgroup_lock); - ulist_free(ulist); - - return ret; -} - -void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) -{ - struct btrfs_root *quota_root; - struct btrfs_qgroup *qgroup; - struct btrfs_fs_info *fs_info = root->fs_info; - struct ulist *ulist = NULL; - struct ulist_node *unode; - struct ulist_iterator uiter; - u64 ref_root = root->root_key.objectid; - - if (!is_fstree(ref_root)) - return; - - if (num_bytes == 0) - return; - - spin_lock(&fs_info->qgroup_lock); - - quota_root = fs_info->quota_root; - if (!quota_root) - goto out; - - qgroup = find_qgroup_rb(fs_info, ref_root); - if (!qgroup) - goto out; - - ulist = ulist_alloc(GFP_ATOMIC); - ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(ulist, &uiter))) { - struct btrfs_qgroup *qg; - struct btrfs_qgroup_list *glist; - - qg = (struct btrfs_qgroup *)unode->aux; - - qg->reserved -= num_bytes; - - list_for_each_entry(glist, &qg->groups, next_group) { - ulist_add(ulist, glist->group->qgroupid, - (unsigned long)glist->group, GFP_ATOMIC); - } - } - -out: - spin_unlock(&fs_info->qgroup_lock); - ulist_free(ulist); -} - -void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) -{ - if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) - return; - printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n", - trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", - trans->delayed_ref_elem.seq); - BUG(); -} diff --git a/trunk/fs/btrfs/relocation.c b/trunk/fs/btrfs/relocation.c index c5dbd9149679..646ee21bb035 100644 --- a/trunk/fs/btrfs/relocation.c +++ b/trunk/fs/btrfs/relocation.c @@ -1239,11 +1239,10 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) node->bytenr, &node->rb_node); spin_unlock(&rc->reloc_root_tree.lock); if (rb_node) { + kfree(node); btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " "for start=%llu while inserting into relocation " "tree\n"); - kfree(node); - return -EEXIST; } list_add_tail(&root->root_list, &rc->reloc_roots); diff --git a/trunk/fs/btrfs/struct-funcs.c b/trunk/fs/btrfs/struct-funcs.c index b976597b0721..c6ffa5812419 100644 --- a/trunk/fs/btrfs/struct-funcs.c +++ b/trunk/fs/btrfs/struct-funcs.c @@ -17,27 +17,15 @@ */ #include -#include -#include "ctree.h" - -static inline u8 get_unaligned_le8(const void *p) -{ - return *(u8 *)p; -} - -static inline void put_unaligned_le8(u8 val, void *p) -{ - *(u8 *)p = val; -} - -/* - * this is some deeply nasty code. +/* this is some deeply nasty code. ctree.h has a different + * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef * * The end result is that anyone who #includes ctree.h gets a - * declaration for the btrfs_set_foo functions and btrfs_foo functions, - * which are wappers of btrfs_set_token_#bits functions and - * btrfs_get_token_#bits functions, which are defined in this file. + * declaration for the btrfs_set_foo functions and btrfs_foo functions + * + * This file declares the macros and then #includes ctree.h, which results + * in cpp creating the function here based on the template below. * * These setget functions do all the extent_buffer related mapping * required to efficiently read and write specific fields in the extent @@ -45,93 +33,103 @@ static inline void put_unaligned_le8(u8 val, void *p) * an unsigned long offset into the extent buffer which has been * cast to a specific type. This gives us all the gcc type checking. * - * The extent buffer api is used to do the page spanning work required to - * have a metadata blocksize different from the page size. + * The extent buffer api is used to do all the kmapping and page + * spanning work required to get extent buffers in highmem and have + * a metadata blocksize different from the page size. + * + * The macro starts with a simple function prototype declaration so that + * sparse won't complain about it being static. */ -#define DEFINE_BTRFS_SETGET_BITS(bits) \ -u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ - unsigned long off, \ - struct btrfs_map_token *token) \ +#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ +u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ +void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \ +void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token); \ +u##bits btrfs_token_##name(struct extent_buffer *eb, \ + type *s, struct btrfs_map_token *token) \ { \ - unsigned long part_offset = (unsigned long)ptr; \ - unsigned long offset = part_offset + off; \ - void *p; \ - int err; \ - char *kaddr; \ - unsigned long map_start; \ - unsigned long map_len; \ - int size = sizeof(u##bits); \ - u##bits res; \ - \ - if (token && token->kaddr && token->offset <= offset && \ - token->eb == eb && \ - (token->offset + PAGE_CACHE_SIZE >= offset + size)) { \ - kaddr = token->kaddr; \ - p = kaddr + part_offset - token->offset; \ - res = get_unaligned_le##bits(p + off); \ - return res; \ - } \ - err = map_private_extent_buffer(eb, offset, size, \ - &kaddr, &map_start, &map_len); \ - if (err) { \ - __le##bits leres; \ - \ - read_extent_buffer(eb, &leres, offset, size); \ - return le##bits##_to_cpu(leres); \ - } \ - p = kaddr + part_offset - map_start; \ - res = get_unaligned_le##bits(p + off); \ - if (token) { \ - token->kaddr = kaddr; \ - token->offset = map_start; \ - token->eb = eb; \ - } \ - return res; \ + unsigned long part_offset = (unsigned long)s; \ + unsigned long offset = part_offset + offsetof(type, member); \ + type *p; \ + int err; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + unsigned long mem_len = sizeof(((type *)0)->member); \ + u##bits res; \ + if (token && token->kaddr && token->offset <= offset && \ + token->eb == eb && \ + (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \ + kaddr = token->kaddr; \ + p = (type *)(kaddr + part_offset - token->offset); \ + res = le##bits##_to_cpu(p->member); \ + return res; \ + } \ + err = map_private_extent_buffer(eb, offset, \ + mem_len, \ + &kaddr, &map_start, &map_len); \ + if (err) { \ + __le##bits leres; \ + read_eb_member(eb, s, type, member, &leres); \ + return le##bits##_to_cpu(leres); \ + } \ + p = (type *)(kaddr + part_offset - map_start); \ + res = le##bits##_to_cpu(p->member); \ + if (token) { \ + token->kaddr = kaddr; \ + token->offset = map_start; \ + token->eb = eb; \ + } \ + return res; \ } \ -void btrfs_set_token_##bits(struct extent_buffer *eb, \ - void *ptr, unsigned long off, u##bits val, \ - struct btrfs_map_token *token) \ +void btrfs_set_token_##name(struct extent_buffer *eb, \ + type *s, u##bits val, struct btrfs_map_token *token) \ { \ - unsigned long part_offset = (unsigned long)ptr; \ - unsigned long offset = part_offset + off; \ - void *p; \ - int err; \ - char *kaddr; \ - unsigned long map_start; \ - unsigned long map_len; \ - int size = sizeof(u##bits); \ - \ - if (token && token->kaddr && token->offset <= offset && \ - token->eb == eb && \ - (token->offset + PAGE_CACHE_SIZE >= offset + size)) { \ - kaddr = token->kaddr; \ - p = kaddr + part_offset - token->offset; \ - put_unaligned_le##bits(val, p + off); \ - return; \ - } \ - err = map_private_extent_buffer(eb, offset, size, \ - &kaddr, &map_start, &map_len); \ - if (err) { \ - __le##bits val2; \ - \ - val2 = cpu_to_le##bits(val); \ - write_extent_buffer(eb, &val2, offset, size); \ - return; \ - } \ - p = kaddr + part_offset - map_start; \ - put_unaligned_le##bits(val, p + off); \ - if (token) { \ - token->kaddr = kaddr; \ - token->offset = map_start; \ - token->eb = eb; \ - } \ -} + unsigned long part_offset = (unsigned long)s; \ + unsigned long offset = part_offset + offsetof(type, member); \ + type *p; \ + int err; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + unsigned long mem_len = sizeof(((type *)0)->member); \ + if (token && token->kaddr && token->offset <= offset && \ + token->eb == eb && \ + (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \ + kaddr = token->kaddr; \ + p = (type *)(kaddr + part_offset - token->offset); \ + p->member = cpu_to_le##bits(val); \ + return; \ + } \ + err = map_private_extent_buffer(eb, offset, \ + mem_len, \ + &kaddr, &map_start, &map_len); \ + if (err) { \ + __le##bits val2; \ + val2 = cpu_to_le##bits(val); \ + write_eb_member(eb, s, type, member, &val2); \ + return; \ + } \ + p = (type *)(kaddr + part_offset - map_start); \ + p->member = cpu_to_le##bits(val); \ + if (token) { \ + token->kaddr = kaddr; \ + token->offset = map_start; \ + token->eb = eb; \ + } \ +} \ +void btrfs_set_##name(struct extent_buffer *eb, \ + type *s, u##bits val) \ +{ \ + btrfs_set_token_##name(eb, s, val, NULL); \ +} \ +u##bits btrfs_##name(struct extent_buffer *eb, \ + type *s) \ +{ \ + return btrfs_token_##name(eb, s, NULL); \ +} \ -DEFINE_BTRFS_SETGET_BITS(8) -DEFINE_BTRFS_SETGET_BITS(16) -DEFINE_BTRFS_SETGET_BITS(32) -DEFINE_BTRFS_SETGET_BITS(64) +#include "ctree.h" void btrfs_node_key(struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) diff --git a/trunk/fs/btrfs/super.c b/trunk/fs/btrfs/super.c index 75ee2c7791f0..e23991574fdf 100644 --- a/trunk/fs/btrfs/super.c +++ b/trunk/fs/btrfs/super.c @@ -396,23 +396,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) strcmp(args[0].from, "zlib") == 0) { compress_type = "zlib"; info->compress_type = BTRFS_COMPRESS_ZLIB; - btrfs_set_opt(info->mount_opt, COMPRESS); } else if (strcmp(args[0].from, "lzo") == 0) { compress_type = "lzo"; info->compress_type = BTRFS_COMPRESS_LZO; - btrfs_set_opt(info->mount_opt, COMPRESS); - btrfs_set_fs_incompat(info, COMPRESS_LZO); - } else if (strncmp(args[0].from, "no", 2) == 0) { - compress_type = "no"; - info->compress_type = BTRFS_COMPRESS_NONE; - btrfs_clear_opt(info->mount_opt, COMPRESS); - btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); - compress_force = false; } else { ret = -EINVAL; goto out; } + btrfs_set_opt(info->mount_opt, COMPRESS); if (compress_force) { btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); pr_info("btrfs: force %s compression\n", @@ -1463,13 +1455,6 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, ret = btrfs_scan_one_device(vol->name, FMODE_READ, &btrfs_fs_type, &fs_devices); break; - case BTRFS_IOC_DEVICES_READY: - ret = btrfs_scan_one_device(vol->name, FMODE_READ, - &btrfs_fs_type, &fs_devices); - if (ret) - break; - ret = !(fs_devices->num_devices == fs_devices->total_devices); - break; } kfree(vol); @@ -1492,6 +1477,16 @@ static int btrfs_unfreeze(struct super_block *sb) return 0; } +static void btrfs_fs_dirty_inode(struct inode *inode, int flags) +{ + int ret; + + ret = btrfs_dirty_inode(inode); + if (ret) + printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu " + "error %d\n", btrfs_ino(inode), ret); +} + static int btrfs_show_devname(struct seq_file *m, struct dentry *root) { struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); @@ -1531,6 +1526,7 @@ static const struct super_operations btrfs_super_ops = { .show_options = btrfs_show_options, .show_devname = btrfs_show_devname, .write_inode = btrfs_write_inode, + .dirty_inode = btrfs_fs_dirty_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs, diff --git a/trunk/fs/btrfs/transaction.c b/trunk/fs/btrfs/transaction.c index cc20e95ea289..b72b068183ec 100644 --- a/trunk/fs/btrfs/transaction.c +++ b/trunk/fs/btrfs/transaction.c @@ -38,6 +38,7 @@ void put_transaction(struct btrfs_transaction *transaction) if (atomic_dec_and_test(&transaction->use_count)) { BUG_ON(!list_empty(&transaction->list)); WARN_ON(transaction->delayed_refs.root.rb_node); + WARN_ON(!list_empty(&transaction->delayed_refs.seq_head)); memset(transaction, 0, sizeof(*transaction)); kmem_cache_free(btrfs_transaction_cachep, transaction); } @@ -99,8 +100,8 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) kmem_cache_free(btrfs_transaction_cachep, cur_trans); cur_trans = fs_info->running_transaction; goto loop; - } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - spin_unlock(&fs_info->trans_lock); + } else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + spin_unlock(&root->fs_info->trans_lock); kmem_cache_free(btrfs_transaction_cachep, cur_trans); return -EROFS; } @@ -125,6 +126,7 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) cur_trans->delayed_refs.num_heads = 0; cur_trans->delayed_refs.flushing = 0; cur_trans->delayed_refs.run_delayed_start = 0; + cur_trans->delayed_refs.seq = 1; /* * although the tree mod log is per file system and not per transaction, @@ -143,8 +145,10 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) } atomic_set(&fs_info->tree_mod_seq, 0); + init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); spin_lock_init(&cur_trans->commit_lock); spin_lock_init(&cur_trans->delayed_refs.lock); + INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &fs_info->trans_list); @@ -295,7 +299,6 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, struct btrfs_transaction *cur_trans; u64 num_bytes = 0; int ret; - u64 qgroup_reserved = 0; if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) return ERR_PTR(-EROFS); @@ -314,14 +317,6 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, * the appropriate flushing if need be. */ if (num_items > 0 && root != root->fs_info->chunk_root) { - if (root->fs_info->quota_enabled && - is_fstree(root->root_key.objectid)) { - qgroup_reserved = num_items * root->leafsize; - ret = btrfs_qgroup_reserve(root, qgroup_reserved); - if (ret) - return ERR_PTR(ret); - } - num_bytes = btrfs_calc_trans_metadata_size(root, num_items); ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, @@ -354,16 +349,11 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, h->transaction = cur_trans; h->blocks_used = 0; h->bytes_reserved = 0; - h->root = root; h->delayed_ref_updates = 0; h->use_count = 1; - h->adding_csums = 0; h->block_rsv = NULL; h->orig_rsv = NULL; h->aborted = 0; - h->qgroup_reserved = qgroup_reserved; - h->delayed_ref_elem.seq = 0; - INIT_LIST_HEAD(&h->qgroup_ref_list); smp_mb(); if (cur_trans->blocked && may_wait_transaction(root, type)) { @@ -483,6 +473,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_transaction *cur_trans = trans->transaction; + struct btrfs_block_rsv *rsv = trans->block_rsv; int updates; int err; @@ -490,6 +481,12 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, if (cur_trans->blocked || cur_trans->delayed_refs.flushing) return 1; + /* + * We need to do this in case we're deleting csums so the global block + * rsv get's used instead of the csum block rsv. + */ + trans->block_rsv = NULL; + updates = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; if (updates) { @@ -498,6 +495,8 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, return err; } + trans->block_rsv = rsv; + return should_end_transaction(trans, root); } @@ -514,24 +513,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, return 0; } - /* - * do the qgroup accounting as early as possible - */ - err = btrfs_delayed_refs_qgroup_accounting(trans, info); - btrfs_trans_release_metadata(trans, root); trans->block_rsv = NULL; - /* - * the same root has to be passed to start_transaction and - * end_transaction. Subvolume quota depends on this. - */ - WARN_ON(trans->root != root); - - if (trans->qgroup_reserved) { - btrfs_qgroup_free(root, trans->qgroup_reserved); - trans->qgroup_reserved = 0; - } - while (count < 2) { unsigned long cur = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; @@ -544,8 +527,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, } count++; } - btrfs_trans_release_metadata(trans, root); - trans->block_rsv = NULL; if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && should_end_transaction(trans, root)) { @@ -586,7 +567,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { err = -EIO; } - assert_qgroups_uptodate(trans); memset(trans, 0, sizeof(*trans)); kmem_cache_free(btrfs_trans_handle_cachep, trans); @@ -805,13 +785,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, ret = btrfs_run_dev_stats(trans, root->fs_info); BUG_ON(ret); - ret = btrfs_run_qgroups(trans, root->fs_info); - BUG_ON(ret); - - /* run_qgroups might have added some more refs */ - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - BUG_ON(ret); - while (!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; list_del_init(next); @@ -984,14 +957,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } } - ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, - objectid, pending->inherit); - kfree(pending->inherit); - if (ret) { - pending->error = ret; - goto fail; - } - key.objectid = objectid; key.offset = (u64)-1; key.type = BTRFS_ROOT_ITEM_KEY; @@ -1304,6 +1269,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_run_ordered_operations(root, 0); + btrfs_trans_release_metadata(trans, root); + trans->block_rsv = NULL; + if (cur_trans->aborted) goto cleanup_transaction; @@ -1314,9 +1282,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (ret) goto cleanup_transaction; - btrfs_trans_release_metadata(trans, root); - trans->block_rsv = NULL; - cur_trans = trans->transaction; /* @@ -1365,8 +1330,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, spin_unlock(&root->fs_info->trans_lock); } - if (!btrfs_test_opt(root, SSD) && - (now < cur_trans->start_time || now - cur_trans->start_time < 1)) + if (now < cur_trans->start_time || now - cur_trans->start_time < 1) should_grow = 1; do { @@ -1387,13 +1351,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (ret) goto cleanup_transaction; - /* - * running the delayed items may have added new refs. account - * them now so that they hinder processing of more delayed refs - * as little as possible. - */ - btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); - /* * rename don't use btrfs_join_transaction, so, once we * set the transaction to blocked above, we aren't going @@ -1506,7 +1463,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, root->fs_info->chunk_root->node); switch_commit_root(root->fs_info->chunk_root); - assert_qgroups_uptodate(trans); update_super_roots(root); if (!root->fs_info->log_root_recovering) { @@ -1576,8 +1532,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; cleanup_transaction: - btrfs_trans_release_metadata(trans, root); - trans->block_rsv = NULL; btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n"); // WARN_ON(1); if (current->journal_info == trans) diff --git a/trunk/fs/btrfs/transaction.h b/trunk/fs/btrfs/transaction.h index e8b8416c688b..fe27379e368b 100644 --- a/trunk/fs/btrfs/transaction.h +++ b/trunk/fs/btrfs/transaction.h @@ -20,7 +20,6 @@ #define __BTRFS_TRANSACTION__ #include "btrfs_inode.h" #include "delayed-ref.h" -#include "ctree.h" struct btrfs_transaction { u64 transid; @@ -50,7 +49,6 @@ struct btrfs_transaction { struct btrfs_trans_handle { u64 transid; u64 bytes_reserved; - u64 qgroup_reserved; unsigned long use_count; unsigned long blocks_reserved; unsigned long blocks_used; @@ -59,22 +57,12 @@ struct btrfs_trans_handle { struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *orig_rsv; int aborted; - int adding_csums; - /* - * this root is only needed to validate that the root passed to - * start_transaction is the same as the one passed to end_transaction. - * Subvolume quota depends on this - */ - struct btrfs_root *root; - struct seq_list delayed_ref_elem; - struct list_head qgroup_ref_list; }; struct btrfs_pending_snapshot { struct dentry *dentry; struct btrfs_root *root; struct btrfs_root *snap; - struct btrfs_qgroup_inherit *inherit; /* block reservation for the operation */ struct btrfs_block_rsv block_rsv; /* extra metadata reseration for relocation */ diff --git a/trunk/fs/btrfs/tree-log.c b/trunk/fs/btrfs/tree-log.c index c86670f4f285..8abeae4224f9 100644 --- a/trunk/fs/btrfs/tree-log.c +++ b/trunk/fs/btrfs/tree-log.c @@ -637,7 +637,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, } inode_set_bytes(inode, saved_nbytes); - ret = btrfs_update_inode(trans, root, inode); + btrfs_update_inode(trans, root, inode); out: if (inode) iput(inode); @@ -1133,7 +1133,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, btrfs_release_path(path); if (ret == 0) { btrfs_inc_nlink(inode); - ret = btrfs_update_inode(trans, root, inode); + btrfs_update_inode(trans, root, inode); } else if (ret == -EEXIST) { ret = 0; } else { diff --git a/trunk/fs/btrfs/volumes.c b/trunk/fs/btrfs/volumes.c index b8708f994e67..ecaad40e7ef4 100644 --- a/trunk/fs/btrfs/volumes.c +++ b/trunk/fs/btrfs/volumes.c @@ -429,7 +429,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) mutex_init(&fs_devices->device_list_mutex); fs_devices->latest_devid = orig->latest_devid; fs_devices->latest_trans = orig->latest_trans; - fs_devices->total_devices = orig->total_devices; memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); /* We have held the volume lock, it is safe to get the devices. */ @@ -740,7 +739,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, int ret; u64 devid; u64 transid; - u64 total_devices; flags |= FMODE_EXCL; bdev = blkdev_get_by_path(path, flags, holder); @@ -762,7 +760,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, disk_super = (struct btrfs_super_block *)bh->b_data; devid = btrfs_stack_device_id(&disk_super->dev_item); transid = btrfs_super_generation(disk_super); - total_devices = btrfs_super_num_devices(disk_super); if (disk_super->label[0]) printk(KERN_INFO "device label %s ", disk_super->label); else @@ -770,8 +767,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, printk(KERN_CONT "devid %llu transid %llu %s\n", (unsigned long long)devid, (unsigned long long)transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); - if (!ret && fs_devices_ret) - (*fs_devices_ret)->total_devices = total_devices; + brelse(bh); error_close: mutex_unlock(&uuid_mutex); @@ -1437,7 +1433,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) list_del_rcu(&device->dev_list); device->fs_devices->num_devices--; - device->fs_devices->total_devices--; if (device->missing) root->fs_info->fs_devices->missing_devices--; @@ -1555,7 +1550,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) fs_devices->seeding = 0; fs_devices->num_devices = 0; fs_devices->open_devices = 0; - fs_devices->total_devices = 0; fs_devices->seed = seed_devices; generate_random_uuid(fs_devices->fsid); @@ -1755,7 +1749,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->num_devices++; root->fs_info->fs_devices->open_devices++; root->fs_info->fs_devices->rw_devices++; - root->fs_info->fs_devices->total_devices++; if (device->can_discard) root->fs_info->fs_devices->num_can_discard++; root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; @@ -4743,6 +4736,9 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) key.offset = device->devid; ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); if (ret) { + printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", + rcu_str_deref(device->name), + (unsigned long long)device->devid); __btrfs_reset_dev_stats(device); device->dev_stats_valid = 1; btrfs_release_path(path); @@ -4884,14 +4880,6 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) { - int i; - - for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) - if (btrfs_dev_stat_read(dev, i) != 0) - break; - if (i == BTRFS_DEV_STAT_VALUES_MAX) - return; /* all values == 0, suppress message */ - printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", rcu_str_deref(dev->name), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), @@ -4902,7 +4890,8 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) } int btrfs_get_dev_stats(struct btrfs_root *root, - struct btrfs_ioctl_get_dev_stats *stats) + struct btrfs_ioctl_get_dev_stats *stats, + int reset_after_read) { struct btrfs_device *dev; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; @@ -4920,7 +4909,7 @@ int btrfs_get_dev_stats(struct btrfs_root *root, printk(KERN_WARNING "btrfs: get dev_stats failed, not yet valid\n"); return -ENODEV; - } else if (stats->flags & BTRFS_DEV_STATS_RESET) { + } else if (reset_after_read) { for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { if (stats->nr_items > i) stats->values[i] = diff --git a/trunk/fs/btrfs/volumes.h b/trunk/fs/btrfs/volumes.h index 5479325987b3..95f6637614db 100644 --- a/trunk/fs/btrfs/volumes.h +++ b/trunk/fs/btrfs/volumes.h @@ -126,7 +126,6 @@ struct btrfs_fs_devices { u64 missing_devices; u64 total_rw_bytes; u64 num_can_discard; - u64 total_devices; struct block_device *latest_bdev; /* all of the devices in the FS, protected by a mutex @@ -294,7 +293,8 @@ struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, void btrfs_dev_stat_print_on_error(struct btrfs_device *device); void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); int btrfs_get_dev_stats(struct btrfs_root *root, - struct btrfs_ioctl_get_dev_stats *stats); + struct btrfs_ioctl_get_dev_stats *stats, + int reset_after_read); int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); diff --git a/trunk/fs/inode.c b/trunk/fs/inode.c index 033529ecd242..c99163b1b310 100644 --- a/trunk/fs/inode.c +++ b/trunk/fs/inode.c @@ -1551,8 +1551,6 @@ void touch_atime(struct path *path) * Btrfs), but since we touch atime while walking down the path we * really don't care if we failed to update the atime of the file, * so just ignore the return value. - * We may also fail on filesystems that have the ability to make parts - * of the fs read only, e.g. subvolumes in Btrfs. */ update_time(inode, &now, S_ATIME); mnt_drop_write(mnt);