diff --git a/[refs] b/[refs] index 733809e033d0..ce2e8353f308 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 9613bebb223dea3179c265dc31e1bb41ae39f321 +refs/heads/master: f52a759327b8b30fbf66da2d75961d2dbc7c6cfa diff --git a/trunk/Documentation/dontdiff b/trunk/Documentation/dontdiff index 0c083c5c2faa..b4a898f43c37 100644 --- a/trunk/Documentation/dontdiff +++ b/trunk/Documentation/dontdiff @@ -158,7 +158,6 @@ logo_*.c logo_*_clut224.c logo_*_mono.c lxdialog -mach mach-types mach-types.h machtypes.h diff --git a/trunk/arch/ia64/include/asm/cmpxchg.h b/trunk/arch/ia64/include/asm/cmpxchg.h index 4c96187e2049..507c66c77600 100644 --- a/trunk/arch/ia64/include/asm/cmpxchg.h +++ b/trunk/arch/ia64/include/asm/cmpxchg.h @@ -1 +1 @@ -#include +/* Future home of xchg() and cmpxchg() */ diff --git a/trunk/arch/s390/include/asm/cpu_mf.h b/trunk/arch/s390/include/asm/cpu_mf.h index a3afecdae145..e49db5d5d06f 100644 --- a/trunk/arch/s390/include/asm/cpu_mf.h +++ b/trunk/arch/s390/include/asm/cpu_mf.h @@ -12,8 +12,6 @@ #ifndef _ASM_S390_CPU_MF_H #define _ASM_S390_CPU_MF_H -#include - #define CPU_MF_INT_SF_IAE (1 << 31) /* invalid entry address */ #define CPU_MF_INT_SF_ISE (1 << 30) /* incorrect SDBT entry */ #define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */ diff --git a/trunk/arch/s390/include/asm/mmu.h b/trunk/arch/s390/include/asm/mmu.h index 6340178748bf..1c7d6ce328bf 100644 --- a/trunk/arch/s390/include/asm/mmu.h +++ b/trunk/arch/s390/include/asm/mmu.h @@ -1,8 +1,6 @@ #ifndef __MMU_H #define __MMU_H -#include - typedef struct { atomic_t attach_count; unsigned int flush_mm; diff --git a/trunk/arch/s390/kernel/lgr.c b/trunk/arch/s390/kernel/lgr.c index 87f080b17af1..ac39e7a731fc 100644 --- a/trunk/arch/s390/kernel/lgr.c +++ b/trunk/arch/s390/kernel/lgr.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/trunk/arch/s390/kernel/perf_cpum_cf.c b/trunk/arch/s390/kernel/perf_cpum_cf.c index 46405086479c..8481ecf2ad71 100644 --- a/trunk/arch/s390/kernel/perf_cpum_cf.c +++ b/trunk/arch/s390/kernel/perf_cpum_cf.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/trunk/arch/s390/kernel/perf_event.c b/trunk/arch/s390/kernel/perf_event.c index f58f37f66824..609f985198cf 100644 --- a/trunk/arch/s390/kernel/perf_event.c +++ b/trunk/arch/s390/kernel/perf_event.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/trunk/arch/s390/kernel/setup.c b/trunk/arch/s390/kernel/setup.c index 06264ae8ccd9..1581ea2e027a 100644 --- a/trunk/arch/s390/kernel/setup.c +++ b/trunk/arch/s390/kernel/setup.c @@ -50,7 +50,6 @@ #include #include -#include #include #include #include diff --git a/trunk/arch/s390/kernel/smp.c b/trunk/arch/s390/kernel/smp.c index 1f77227669e8..a8bf9994b086 100644 --- a/trunk/arch/s390/kernel/smp.c +++ b/trunk/arch/s390/kernel/smp.c @@ -32,8 +32,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/trunk/arch/sparc/include/asm/ptrace.h b/trunk/arch/sparc/include/asm/ptrace.h index 9835adc163fd..ef8c7c068f53 100644 --- a/trunk/arch/sparc/include/asm/ptrace.h +++ b/trunk/arch/sparc/include/asm/ptrace.h @@ -165,7 +165,6 @@ struct sparc_stackf { #ifdef __KERNEL__ #include -#include static inline int pt_regs_trap_type(struct pt_regs *regs) { diff --git a/trunk/arch/sparc/kernel/jump_label.c b/trunk/arch/sparc/kernel/jump_label.c index 48565c11e82a..971fd435a281 100644 --- a/trunk/arch/sparc/kernel/jump_label.c +++ b/trunk/arch/sparc/kernel/jump_label.c @@ -6,8 +6,6 @@ #include #include -#include - #ifdef HAVE_JUMP_LABEL void arch_jump_label_transform(struct jump_entry *entry, diff --git a/trunk/arch/sparc/kernel/kgdb_64.c b/trunk/arch/sparc/kernel/kgdb_64.c index c8759550799f..768290a6c028 100644 --- a/trunk/arch/sparc/kernel/kgdb_64.c +++ b/trunk/arch/sparc/kernel/kgdb_64.c @@ -7,7 +7,6 @@ #include #include -#include #include #include #include diff --git a/trunk/fs/btrfs/async-thread.c b/trunk/fs/btrfs/async-thread.c index 42704149b723..0cc20b35c1c4 100644 --- a/trunk/fs/btrfs/async-thread.c +++ b/trunk/fs/btrfs/async-thread.c @@ -171,11 +171,11 @@ static void check_pending_worker_creates(struct btrfs_worker_thread *worker) spin_unlock_irqrestore(&workers->lock, flags); } -static noinline void run_ordered_completions(struct btrfs_workers *workers, +static noinline int run_ordered_completions(struct btrfs_workers *workers, struct btrfs_work *work) { if (!workers->ordered) - return; + return 0; set_bit(WORK_DONE_BIT, &work->flags); @@ -213,6 +213,7 @@ static noinline void run_ordered_completions(struct btrfs_workers *workers, } spin_unlock(&workers->order_lock); + return 0; } static void put_worker(struct btrfs_worker_thread *worker) @@ -398,7 +399,7 @@ static int worker_loop(void *arg) /* * this will wait for all the worker threads to shutdown */ -void btrfs_stop_workers(struct btrfs_workers *workers) +int btrfs_stop_workers(struct btrfs_workers *workers) { struct list_head *cur; struct btrfs_worker_thread *worker; @@ -426,6 +427,7 @@ void btrfs_stop_workers(struct btrfs_workers *workers) put_worker(worker); } spin_unlock_irq(&workers->lock); + return 0; } /* @@ -613,14 +615,14 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) * it was taken from. It is intended for use with long running work functions * that make some progress and want to give the cpu up for others. */ -void btrfs_requeue_work(struct btrfs_work *work) +int btrfs_requeue_work(struct btrfs_work *work) { struct btrfs_worker_thread *worker = work->worker; unsigned long flags; int wake = 0; if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) - return; + goto out; spin_lock_irqsave(&worker->lock, flags); if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) @@ -647,6 +649,9 @@ void btrfs_requeue_work(struct btrfs_work *work) if (wake) wake_up_process(worker->task); spin_unlock_irqrestore(&worker->lock, flags); +out: + + return 0; } void btrfs_set_work_high_prio(struct btrfs_work *work) diff --git a/trunk/fs/btrfs/async-thread.h b/trunk/fs/btrfs/async-thread.h index 063698b90ce2..f34cc31fa3c9 100644 --- a/trunk/fs/btrfs/async-thread.h +++ b/trunk/fs/btrfs/async-thread.h @@ -111,9 +111,9 @@ struct btrfs_workers { void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); int btrfs_start_workers(struct btrfs_workers *workers); -void btrfs_stop_workers(struct btrfs_workers *workers); +int btrfs_stop_workers(struct btrfs_workers *workers); void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, struct btrfs_workers *async_starter); -void btrfs_requeue_work(struct btrfs_work *work); +int btrfs_requeue_work(struct btrfs_work *work); void btrfs_set_work_high_prio(struct btrfs_work *work); #endif diff --git a/trunk/fs/btrfs/backref.c b/trunk/fs/btrfs/backref.c index f4e90748940a..0436c12da8c2 100644 --- a/trunk/fs/btrfs/backref.c +++ b/trunk/fs/btrfs/backref.c @@ -116,7 +116,6 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, * to a logical address */ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, - int search_commit_root, struct __prelim_ref *ref, struct ulist *parents) { @@ -132,7 +131,6 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->search_commit_root = !!search_commit_root; root_key.objectid = ref->root_id; root_key.type = BTRFS_ROOT_ITEM_KEY; @@ -190,7 +188,6 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, * resolve all indirect backrefs from the list */ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, - int search_commit_root, struct list_head *head) { int err; @@ -215,8 +212,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, continue; if (ref->count == 0) continue; - err = __resolve_indirect_ref(fs_info, search_commit_root, - ref, parents); + err = __resolve_indirect_ref(fs_info, ref, parents); if (err) { if (ret == 0) ret = err; @@ -590,7 +586,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *head; int info_level = 0; int ret; - int search_commit_root = (trans == BTRFS_BACKREF_SEARCH_COMMIT_ROOT); struct list_head prefs_delayed; struct list_head prefs; struct __prelim_ref *ref; @@ -605,7 +600,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->search_commit_root = !!search_commit_root; /* * grab both a lock on the path and a lock on the delayed ref head. @@ -620,39 +614,35 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, goto out; BUG_ON(ret == 0); - if (trans != BTRFS_BACKREF_SEARCH_COMMIT_ROOT) { - /* - * look if there are updates for this ref queued and lock the - * head - */ - delayed_refs = &trans->transaction->delayed_refs; - spin_lock(&delayed_refs->lock); - head = btrfs_find_delayed_ref_head(trans, bytenr); - if (head) { - if (!mutex_trylock(&head->mutex)) { - atomic_inc(&head->node.refs); - spin_unlock(&delayed_refs->lock); - - btrfs_release_path(path); - - /* - * Mutex was contended, block until it's - * released and try again - */ - mutex_lock(&head->mutex); - mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(&head->node); - goto again; - } - ret = __add_delayed_refs(head, seq, &info_key, - &prefs_delayed); - if (ret) { - spin_unlock(&delayed_refs->lock); - goto out; - } + /* + * look if there are updates for this ref queued and lock the head + */ + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + head = btrfs_find_delayed_ref_head(trans, bytenr); + if (head) { + if (!mutex_trylock(&head->mutex)) { + atomic_inc(&head->node.refs); + spin_unlock(&delayed_refs->lock); + + btrfs_release_path(path); + + /* + * Mutex was contended, block until it's + * released and try again + */ + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref(&head->node); + goto again; + } + ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed); + if (ret) { + spin_unlock(&delayed_refs->lock); + goto out; } - spin_unlock(&delayed_refs->lock); } + spin_unlock(&delayed_refs->lock); if (path->slots[0]) { struct extent_buffer *leaf; @@ -689,7 +679,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, if (ret) goto out; - ret = __resolve_indirect_refs(fs_info, search_commit_root, &prefs); + ret = __resolve_indirect_refs(fs_info, &prefs); if (ret) goto out; @@ -1084,7 +1074,8 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, return 0; } -static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, u64 logical, +static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, u64 logical, u64 orig_extent_item_objectid, u64 extent_item_pos, u64 root, iterate_extent_inodes_t *iterate, void *ctx) @@ -1152,38 +1143,35 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, u64 logical, * calls iterate() for every inode that references the extent identified by * the given parameters. * when the iterator function returns a non-zero value, iteration stops. + * path is guaranteed to be in released state when iterate() is called. */ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, u64 extent_item_objectid, u64 extent_item_pos, - int search_commit_root, iterate_extent_inodes_t *iterate, void *ctx) { int ret; struct list_head data_refs = LIST_HEAD_INIT(data_refs); struct list_head shared_refs = LIST_HEAD_INIT(shared_refs); struct btrfs_trans_handle *trans; - struct ulist *refs = NULL; - struct ulist *roots = NULL; + struct ulist *refs; + struct ulist *roots; struct ulist_node *ref_node = NULL; struct ulist_node *root_node = NULL; struct seq_list seq_elem; - struct btrfs_delayed_ref_root *delayed_refs = NULL; + struct btrfs_delayed_ref_root *delayed_refs; + + trans = btrfs_join_transaction(fs_info->extent_root); + if (IS_ERR(trans)) + return PTR_ERR(trans); pr_debug("resolving all inodes for extent %llu\n", extent_item_objectid); - if (search_commit_root) { - trans = BTRFS_BACKREF_SEARCH_COMMIT_ROOT; - } else { - trans = btrfs_join_transaction(fs_info->extent_root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - delayed_refs = &trans->transaction->delayed_refs; - spin_lock(&delayed_refs->lock); - btrfs_get_delayed_seq(delayed_refs, &seq_elem); - spin_unlock(&delayed_refs->lock); - } + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + btrfs_get_delayed_seq(delayed_refs, &seq_elem); + spin_unlock(&delayed_refs->lock); ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, extent_item_pos, seq_elem.seq, @@ -1200,7 +1188,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, while (!ret && (root_node = ulist_next(roots, root_node))) { pr_debug("root %llu references leaf %llu\n", root_node->val, ref_node->val); - ret = iterate_leaf_refs(fs_info, ref_node->val, + ret = iterate_leaf_refs(fs_info, path, ref_node->val, extent_item_objectid, extent_item_pos, root_node->val, iterate, ctx); @@ -1210,11 +1198,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, ulist_free(refs); ulist_free(roots); out: - if (!search_commit_root) { - btrfs_put_delayed_seq(delayed_refs, &seq_elem); - btrfs_end_transaction(trans, fs_info->extent_root); - } - + btrfs_put_delayed_seq(delayed_refs, &seq_elem); + btrfs_end_transaction(trans, fs_info->extent_root); return ret; } @@ -1225,7 +1210,6 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, int ret; u64 extent_item_pos; struct btrfs_key found_key; - int search_commit_root = path->search_commit_root; ret = extent_from_logical(fs_info, logical, path, &found_key); @@ -1236,9 +1220,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, return ret; extent_item_pos = logical - found_key.objectid; - ret = iterate_extent_inodes(fs_info, found_key.objectid, - extent_item_pos, search_commit_root, - iterate, ctx); + ret = iterate_extent_inodes(fs_info, path, found_key.objectid, + extent_item_pos, iterate, ctx); return ret; } @@ -1359,6 +1342,12 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) inode_to_path, ipath); } +/* + * allocates space to return multiple file system paths for an inode. + * total_bytes to allocate are passed, note that space usable for actual path + * information will be total_bytes - sizeof(struct inode_fs_paths). + * the returned pointer must be freed with free_ipath() in the end. + */ struct btrfs_data_container *init_data_container(u32 total_bytes) { struct btrfs_data_container *data; @@ -1414,6 +1403,5 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, void free_ipath(struct inode_fs_paths *ipath) { - kfree(ipath->fspath); kfree(ipath); } diff --git a/trunk/fs/btrfs/backref.h b/trunk/fs/btrfs/backref.h index 57ea2e959e4d..d00dfa9ca934 100644 --- a/trunk/fs/btrfs/backref.h +++ b/trunk/fs/btrfs/backref.h @@ -22,8 +22,6 @@ #include "ioctl.h" #include "ulist.h" -#define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0) - struct inode_fs_paths { struct btrfs_path *btrfs_path; struct btrfs_root *fs_root; @@ -46,8 +44,9 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, u64 *out_root, u8 *out_level); int iterate_extent_inodes(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, u64 extent_item_objectid, - u64 extent_offset, int search_commit_root, + u64 extent_offset, iterate_extent_inodes_t *iterate, void *ctx); int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, diff --git a/trunk/fs/btrfs/compression.c b/trunk/fs/btrfs/compression.c index d286b40a5671..b805afb37fa8 100644 --- a/trunk/fs/btrfs/compression.c +++ b/trunk/fs/btrfs/compression.c @@ -226,8 +226,8 @@ static void end_compressed_bio_read(struct bio *bio, int err) * Clear the writeback bits on all of the file * pages for a compressed write */ -static noinline void end_compressed_writeback(struct inode *inode, u64 start, - unsigned long ram_size) +static noinline int end_compressed_writeback(struct inode *inode, u64 start, + unsigned long ram_size) { unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT; @@ -253,6 +253,7 @@ static noinline void end_compressed_writeback(struct inode *inode, u64 start, index += ret; } /* the inode may be gone now */ + return 0; } /* @@ -391,16 +392,16 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, */ atomic_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); if (!skip_sum) { ret = btrfs_csum_one_bio(root, inode, bio, start, 1); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } ret = btrfs_map_bio(root, WRITE, bio, 0, 1); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); bio_put(bio); @@ -420,15 +421,15 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_get(bio); ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); if (!skip_sum) { ret = btrfs_csum_one_bio(root, inode, bio, start, 1); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } ret = btrfs_map_bio(root, WRITE, bio, 0, 1); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); bio_put(bio); return 0; @@ -496,7 +497,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, * sure they map to this compressed extent on disk. */ set_page_extent_mapped(page); - lock_extent(tree, last_offset, end); + lock_extent(tree, last_offset, end, GFP_NOFS); read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, last_offset, PAGE_CACHE_SIZE); @@ -506,7 +507,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || (em->block_start >> 9) != cb->orig_bio->bi_sector) { free_extent_map(em); - unlock_extent(tree, last_offset, end); + unlock_extent(tree, last_offset, end, GFP_NOFS); unlock_page(page); page_cache_release(page); break; @@ -534,7 +535,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, nr_pages++; page_cache_release(page); } else { - unlock_extent(tree, last_offset, end); + unlock_extent(tree, last_offset, end, GFP_NOFS); unlock_page(page); page_cache_release(page); break; @@ -661,7 +662,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_get(comp_bio); ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); /* * inc the count before we submit the bio so @@ -674,14 +675,14 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } sums += (comp_bio->bi_size + root->sectorsize - 1) / root->sectorsize; ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); bio_put(comp_bio); @@ -697,15 +698,15 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_get(comp_bio); ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); bio_put(comp_bio); return 0; @@ -733,7 +734,7 @@ struct btrfs_compress_op *btrfs_compress_op[] = { &btrfs_lzo_compress, }; -void __init btrfs_init_compress(void) +int __init btrfs_init_compress(void) { int i; @@ -743,6 +744,7 @@ void __init btrfs_init_compress(void) atomic_set(&comp_alloc_workspace[i], 0); init_waitqueue_head(&comp_workspace_wait[i]); } + return 0; } /* diff --git a/trunk/fs/btrfs/compression.h b/trunk/fs/btrfs/compression.h index 9afb0a62ae82..a12059f4f0fd 100644 --- a/trunk/fs/btrfs/compression.h +++ b/trunk/fs/btrfs/compression.h @@ -19,7 +19,7 @@ #ifndef __BTRFS_COMPRESSION_ #define __BTRFS_COMPRESSION_ -void btrfs_init_compress(void); +int btrfs_init_compress(void); void btrfs_exit_compress(void); int btrfs_compress_pages(int type, struct address_space *mapping, diff --git a/trunk/fs/btrfs/ctree.c b/trunk/fs/btrfs/ctree.c index e801f226d7e0..0639a555e16e 100644 --- a/trunk/fs/btrfs/ctree.c +++ b/trunk/fs/btrfs/ctree.c @@ -36,7 +36,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *dst_buf, struct extent_buffer *src_buf); -static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, +static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); struct btrfs_path *btrfs_alloc_path(void) @@ -156,23 +156,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root) { struct extent_buffer *eb; - while (1) { - rcu_read_lock(); - eb = rcu_dereference(root->node); - - /* - * RCU really hurts here, we could free up the root node because - * it was cow'ed but we may not get the new root node yet so do - * the inc_not_zero dance and if it doesn't work then - * synchronize_rcu and try again. - */ - if (atomic_inc_not_zero(&eb->refs)) { - rcu_read_unlock(); - break; - } - rcu_read_unlock(); - synchronize_rcu(); - } + rcu_read_lock(); + eb = rcu_dereference(root->node); + extent_buffer_get(eb); + rcu_read_unlock(); return eb; } @@ -344,13 +331,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (btrfs_block_can_be_shared(root, buf)) { ret = btrfs_lookup_extent_info(trans, root, buf->start, buf->len, &refs, &flags); - if (ret) - return ret; - if (refs == 0) { - ret = -EROFS; - btrfs_std_error(root->fs_info, ret); - return ret; - } + BUG_ON(ret); + BUG_ON(refs == 0); } else { refs = 1; if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || @@ -369,14 +351,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { ret = btrfs_inc_ref(trans, root, buf, 1, 1); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { ret = btrfs_dec_ref(trans, root, buf, 0, 1); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); ret = btrfs_inc_ref(trans, root, cow, 1, 1); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; } else { @@ -386,15 +368,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, ret = btrfs_inc_ref(trans, root, cow, 1, 1); else ret = btrfs_inc_ref(trans, root, cow, 0, 1); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } if (new_flags != 0) { ret = btrfs_set_disk_extent_flags(trans, root, buf->start, buf->len, new_flags, 0); - if (ret) - return ret; + BUG_ON(ret); } } else { if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { @@ -403,9 +384,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, ret = btrfs_inc_ref(trans, root, cow, 1, 1); else ret = btrfs_inc_ref(trans, root, cow, 0, 1); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); ret = btrfs_dec_ref(trans, root, buf, 1, 1); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } clean_tree_block(trans, root, buf); *last_ref = 1; @@ -434,7 +415,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, { struct btrfs_disk_key disk_key; struct extent_buffer *cow; - int level, ret; + int level; int last_ref = 0; int unlock_orig = 0; u64 parent_start; @@ -486,11 +467,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, (unsigned long)btrfs_header_fsid(cow), BTRFS_FSID_SIZE); - ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - return ret; - } + update_ref_for_cow(trans, root, buf, cow, &last_ref); if (root->ref_cows) btrfs_reloc_cow_block(trans, root, buf, cow); @@ -527,7 +504,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, } if (unlock_orig) btrfs_tree_unlock(buf); - free_extent_buffer_stale(buf); + free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); *cow_ret = cow; return 0; @@ -957,12 +934,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* promote the child to a root */ child = read_node_slot(root, mid, 0); - if (!child) { - ret = -EROFS; - btrfs_std_error(root->fs_info, ret); - goto enospc; - } - + BUG_ON(!child); btrfs_tree_lock(child); btrfs_set_lock_blocking(child); ret = btrfs_cow_block(trans, root, child, mid, 0, &child); @@ -987,7 +959,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, root_sub_used(root, mid->len); btrfs_free_tree_block(trans, root, mid, 0, 1, 0); /* once for the root ptr */ - free_extent_buffer_stale(mid); + free_extent_buffer(mid); return 0; } if (btrfs_header_nritems(mid) > @@ -1038,10 +1010,13 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (btrfs_header_nritems(right) == 0) { clean_tree_block(trans, root, right); btrfs_tree_unlock(right); - del_ptr(trans, root, path, level + 1, pslot + 1); + wret = del_ptr(trans, root, path, level + 1, pslot + + 1); + if (wret) + ret = wret; root_sub_used(root, right->len); btrfs_free_tree_block(trans, root, right, 0, 1, 0); - free_extent_buffer_stale(right); + free_extent_buffer(right); right = NULL; } else { struct btrfs_disk_key right_key; @@ -1060,11 +1035,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, * otherwise we would have pulled some pointers from the * right */ - if (!left) { - ret = -EROFS; - btrfs_std_error(root->fs_info, ret); - goto enospc; - } + BUG_ON(!left); wret = balance_node_right(trans, root, mid, left); if (wret < 0) { ret = wret; @@ -1080,10 +1051,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (btrfs_header_nritems(mid) == 0) { clean_tree_block(trans, root, mid); btrfs_tree_unlock(mid); - del_ptr(trans, root, path, level + 1, pslot); + wret = del_ptr(trans, root, path, level + 1, pslot); + if (wret) + ret = wret; root_sub_used(root, mid->len); btrfs_free_tree_block(trans, root, mid, 0, 1, 0); - free_extent_buffer_stale(mid); + free_extent_buffer(mid); mid = NULL; } else { /* update the parent key to reflect our changes */ @@ -1409,8 +1382,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, * if lowest_unlock is 1, level 0 won't be unlocked */ static noinline void unlock_up(struct btrfs_path *path, int level, - int lowest_unlock, int min_write_lock_level, - int *write_lock_level) + int lowest_unlock) { int i; int skip_level = level; @@ -1442,11 +1414,6 @@ static noinline void unlock_up(struct btrfs_path *path, int level, if (i >= lowest_unlock && i > skip_level && path->locks[i]) { btrfs_tree_unlock_rw(t, path->locks[i]); path->locks[i] = 0; - if (write_lock_level && - i > min_write_lock_level && - i <= *write_lock_level) { - *write_lock_level = i - 1; - } } } } @@ -1670,7 +1637,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root /* everything at write_lock_level or lower must be write locked */ int write_lock_level = 0; u8 lowest_level = 0; - int min_write_lock_level; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len > 0); @@ -1698,8 +1664,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root if (cow && (p->keep_locks || p->lowest_level)) write_lock_level = BTRFS_MAX_LEVEL; - min_write_lock_level = write_lock_level; - again: /* * we try very hard to do read locks on the root @@ -1831,8 +1795,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root goto again; } - unlock_up(p, level, lowest_unlock, - min_write_lock_level, &write_lock_level); + unlock_up(p, level, lowest_unlock); if (level == lowest_level) { if (dec) @@ -1894,8 +1857,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root } } if (!p->search_for_split) - unlock_up(p, level, lowest_unlock, - min_write_lock_level, &write_lock_level); + unlock_up(p, level, lowest_unlock); goto done; } } @@ -1919,12 +1881,15 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root * fixing up pointers when a given leaf/node is not in slot 0 of the * higher levels * + * If this fails to write a tree block, it returns -1, but continues + * fixing up the blocks in ram so the tree is consistent. */ -static void fixup_low_keys(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_disk_key *key, int level) +static int fixup_low_keys(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_disk_key *key, int level) { int i; + int ret = 0; struct extent_buffer *t; for (i = level; i < BTRFS_MAX_LEVEL; i++) { @@ -1937,6 +1902,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans, if (tslot != 0) break; } + return ret; } /* @@ -1945,9 +1911,9 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans, * This function isn't completely safe. It's the caller's responsibility * that the new key won't break the order */ -void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *new_key) +int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *new_key) { struct btrfs_disk_key disk_key; struct extent_buffer *eb; @@ -1957,11 +1923,13 @@ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, slot = path->slots[0]; if (slot > 0) { btrfs_item_key(eb, &disk_key, slot - 1); - BUG_ON(comp_keys(&disk_key, new_key) >= 0); + if (comp_keys(&disk_key, new_key) >= 0) + return -1; } if (slot < btrfs_header_nritems(eb) - 1) { btrfs_item_key(eb, &disk_key, slot + 1); - BUG_ON(comp_keys(&disk_key, new_key) <= 0); + if (comp_keys(&disk_key, new_key) <= 0) + return -1; } btrfs_cpu_key_to_disk(&disk_key, new_key); @@ -1969,6 +1937,7 @@ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(eb); if (slot == 0) fixup_low_keys(trans, root, path, &disk_key, 1); + return 0; } /* @@ -2171,11 +2140,12 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, * * slot and level indicate where you want the key to go, and * blocknr is the block the key points to. + * + * returns zero on success and < 0 on any error */ -static void insert_ptr(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_disk_key *key, u64 bytenr, - int slot, int level) +static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, struct btrfs_disk_key + *key, u64 bytenr, int slot, int level) { struct extent_buffer *lower; int nritems; @@ -2185,7 +2155,8 @@ static void insert_ptr(struct btrfs_trans_handle *trans, lower = path->nodes[level]; nritems = btrfs_header_nritems(lower); BUG_ON(slot > nritems); - BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); + if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) + BUG(); if (slot != nritems) { memmove_extent_buffer(lower, btrfs_node_key_ptr_offset(slot + 1), @@ -2198,6 +2169,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, btrfs_set_node_ptr_generation(lower, slot, trans->transid); btrfs_set_header_nritems(lower, nritems + 1); btrfs_mark_buffer_dirty(lower); + return 0; } /* @@ -2218,6 +2190,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, struct btrfs_disk_key disk_key; int mid; int ret; + int wret; u32 c_nritems; c = path->nodes[level]; @@ -2274,8 +2247,11 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(c); btrfs_mark_buffer_dirty(split); - insert_ptr(trans, root, path, &disk_key, split->start, - path->slots[level + 1] + 1, level + 1); + wret = insert_ptr(trans, root, path, &disk_key, split->start, + path->slots[level + 1] + 1, + level + 1); + if (wret) + ret = wret; if (path->slots[level] >= mid) { path->slots[level] -= mid; @@ -2344,7 +2320,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, { struct extent_buffer *left = path->nodes[0]; struct extent_buffer *upper = path->nodes[1]; - struct btrfs_map_token token; struct btrfs_disk_key disk_key; int slot; u32 i; @@ -2356,8 +2331,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, u32 data_end; u32 this_item_size; - btrfs_init_map_token(&token); - if (empty) nr = 0; else @@ -2435,8 +2408,8 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - push_space -= btrfs_token_item_size(right, item, &token); - btrfs_set_token_item_offset(right, item, push_space, &token); + push_space -= btrfs_item_size(right, item); + btrfs_set_item_offset(right, item, push_space); } left_nritems -= push_items; @@ -2564,11 +2537,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, u32 old_left_nritems; u32 nr; int ret = 0; + int wret; u32 this_item_size; u32 old_left_item_size; - struct btrfs_map_token token; - - btrfs_init_map_token(&token); if (empty) nr = min(right_nritems, max_slot); @@ -2629,10 +2600,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, item = btrfs_item_nr(left, i); - ioff = btrfs_token_item_offset(left, item, &token); - btrfs_set_token_item_offset(left, item, - ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size), - &token); + ioff = btrfs_item_offset(left, item); + btrfs_set_item_offset(left, item, + ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); } btrfs_set_header_nritems(left, old_left_nritems + push_items); @@ -2662,9 +2632,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - push_space = push_space - btrfs_token_item_size(right, - item, &token); - btrfs_set_token_item_offset(right, item, push_space, &token); + push_space = push_space - btrfs_item_size(right, item); + btrfs_set_item_offset(right, item, push_space); } btrfs_mark_buffer_dirty(left); @@ -2674,7 +2643,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, clean_tree_block(trans, root, right); btrfs_item_key(right, &disk_key, 0); - fixup_low_keys(trans, root, path, &disk_key, 1); + wret = fixup_low_keys(trans, root, path, &disk_key, 1); + if (wret) + ret = wret; /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { @@ -2745,8 +2716,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root path->nodes[1], slot - 1, &left); if (ret) { /* we hit -ENOSPC, but it isn't fatal here */ - if (ret == -ENOSPC) - ret = 1; + ret = 1; goto out; } @@ -2768,21 +2738,22 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* * split the path's leaf in two, making sure there is at least data_size * available for the resulting leaf level of the path. + * + * returns 0 if all went well and < 0 on failure. */ -static noinline void copy_for_split(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct extent_buffer *l, - struct extent_buffer *right, - int slot, int mid, int nritems) +static noinline int copy_for_split(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *l, + struct extent_buffer *right, + int slot, int mid, int nritems) { int data_copy_size; int rt_data_off; int i; + int ret = 0; + int wret; struct btrfs_disk_key disk_key; - struct btrfs_map_token token; - - btrfs_init_map_token(&token); nritems = nritems - mid; btrfs_set_header_nritems(right, nritems); @@ -2804,15 +2775,17 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, struct btrfs_item *item = btrfs_item_nr(right, i); u32 ioff; - ioff = btrfs_token_item_offset(right, item, &token); - btrfs_set_token_item_offset(right, item, - ioff + rt_data_off, &token); + ioff = btrfs_item_offset(right, item); + btrfs_set_item_offset(right, item, ioff + rt_data_off); } btrfs_set_header_nritems(l, mid); + ret = 0; btrfs_item_key(right, &disk_key, 0); - insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1); + wret = insert_ptr(trans, root, path, &disk_key, right->start, + path->slots[1] + 1, 1); + if (wret) + ret = wret; btrfs_mark_buffer_dirty(right); btrfs_mark_buffer_dirty(l); @@ -2830,6 +2803,8 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, } BUG_ON(path->slots[0] < 0); + + return ret; } /* @@ -3018,8 +2993,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, if (split == 0) { if (mid <= slot) { btrfs_set_header_nritems(right, 0); - insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1); + wret = insert_ptr(trans, root, path, + &disk_key, right->start, + path->slots[1] + 1, 1); + if (wret) + ret = wret; + btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; @@ -3027,21 +3006,29 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, path->slots[1] += 1; } else { btrfs_set_header_nritems(right, 0); - insert_ptr(trans, root, path, &disk_key, right->start, + wret = insert_ptr(trans, root, path, + &disk_key, + right->start, path->slots[1], 1); + if (wret) + ret = wret; btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] = 0; - if (path->slots[1] == 0) - fixup_low_keys(trans, root, path, - &disk_key, 1); + if (path->slots[1] == 0) { + wret = fixup_low_keys(trans, root, + path, &disk_key, 1); + if (wret) + ret = wret; + } } btrfs_mark_buffer_dirty(right); return ret; } - copy_for_split(trans, root, path, l, right, slot, mid, nritems); + ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); + BUG_ON(ret); if (split == 2) { BUG_ON(num_doubles != 0); @@ -3049,7 +3036,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, goto again; } - return 0; + return ret; push_for_double: push_for_double_split(trans, root, path, data_size); @@ -3251,9 +3238,11 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, return ret; path->slots[0]++; - setup_items_for_insert(trans, root, path, new_key, &item_size, - item_size, item_size + - sizeof(struct btrfs_item), 1); + ret = setup_items_for_insert(trans, root, path, new_key, &item_size, + item_size, item_size + + sizeof(struct btrfs_item), 1); + BUG_ON(ret); + leaf = path->nodes[0]; memcpy_extent_buffer(leaf, btrfs_item_ptr_offset(leaf, path->slots[0]), @@ -3268,10 +3257,10 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, * off the end of the item or if we shift the item to chop bytes off * the front. */ -void btrfs_truncate_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - u32 new_size, int from_end) +int btrfs_truncate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u32 new_size, int from_end) { int slot; struct extent_buffer *leaf; @@ -3282,16 +3271,13 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans, unsigned int old_size; unsigned int size_diff; int i; - struct btrfs_map_token token; - - btrfs_init_map_token(&token); leaf = path->nodes[0]; slot = path->slots[0]; old_size = btrfs_item_size_nr(leaf, slot); if (old_size == new_size) - return; + return 0; nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); @@ -3311,9 +3297,8 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans, u32 ioff; item = btrfs_item_nr(leaf, i); - ioff = btrfs_token_item_offset(leaf, item, &token); - btrfs_set_token_item_offset(leaf, item, - ioff + size_diff, &token); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + size_diff); } /* shift the data */ @@ -3365,14 +3350,15 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } + return 0; } /* * make the item pointed to by the path bigger, data_size is the new size. */ -void btrfs_extend_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - u32 data_size) +int btrfs_extend_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u32 data_size) { int slot; struct extent_buffer *leaf; @@ -3382,9 +3368,6 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans, unsigned int old_data; unsigned int old_size; int i; - struct btrfs_map_token token; - - btrfs_init_map_token(&token); leaf = path->nodes[0]; @@ -3414,9 +3397,8 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans, u32 ioff; item = btrfs_item_nr(leaf, i); - ioff = btrfs_token_item_offset(leaf, item, &token); - btrfs_set_token_item_offset(leaf, item, - ioff - data_size, &token); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - data_size); } /* shift the data */ @@ -3434,6 +3416,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } + return 0; } /* @@ -3458,9 +3441,6 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, unsigned int data_end; struct btrfs_disk_key disk_key; struct btrfs_key found_key; - struct btrfs_map_token token; - - btrfs_init_map_token(&token); for (i = 0; i < nr; i++) { if (total_size + data_size[i] + sizeof(struct btrfs_item) > @@ -3526,9 +3506,8 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, u32 ioff; item = btrfs_item_nr(leaf, i); - ioff = btrfs_token_item_offset(leaf, item, &token); - btrfs_set_token_item_offset(leaf, item, - ioff - total_data, &token); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - total_data); } /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), @@ -3555,10 +3534,9 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); btrfs_set_item_key(leaf, &disk_key, slot + i); item = btrfs_item_nr(leaf, slot + i); - btrfs_set_token_item_offset(leaf, item, - data_end - data_size[i], &token); + btrfs_set_item_offset(leaf, item, data_end - data_size[i]); data_end -= data_size[i]; - btrfs_set_token_item_size(leaf, item, data_size[i], &token); + btrfs_set_item_size(leaf, item, data_size[i]); } btrfs_set_header_nritems(leaf, nritems + nr); btrfs_mark_buffer_dirty(leaf); @@ -3566,7 +3544,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, ret = 0; if (slot == 0) { btrfs_cpu_key_to_disk(&disk_key, cpu_key); - fixup_low_keys(trans, root, path, &disk_key, 1); + ret = fixup_low_keys(trans, root, path, &disk_key, 1); } if (btrfs_leaf_free_space(root, leaf) < 0) { @@ -3584,21 +3562,19 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, * to save stack depth by doing the bulk of the work in a function * that doesn't call btrfs_search_slot */ -void setup_items_for_insert(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 *data_size, - u32 total_data, u32 total_size, int nr) +int setup_items_for_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 *data_size, + u32 total_data, u32 total_size, int nr) { struct btrfs_item *item; int i; u32 nritems; unsigned int data_end; struct btrfs_disk_key disk_key; + int ret; struct extent_buffer *leaf; int slot; - struct btrfs_map_token token; - - btrfs_init_map_token(&token); leaf = path->nodes[0]; slot = path->slots[0]; @@ -3630,9 +3606,8 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans, u32 ioff; item = btrfs_item_nr(leaf, i); - ioff = btrfs_token_item_offset(leaf, item, &token); - btrfs_set_token_item_offset(leaf, item, - ioff - total_data, &token); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - total_data); } /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), @@ -3651,17 +3626,17 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans, btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); btrfs_set_item_key(leaf, &disk_key, slot + i); item = btrfs_item_nr(leaf, slot + i); - btrfs_set_token_item_offset(leaf, item, - data_end - data_size[i], &token); + btrfs_set_item_offset(leaf, item, data_end - data_size[i]); data_end -= data_size[i]; - btrfs_set_token_item_size(leaf, item, data_size[i], &token); + btrfs_set_item_size(leaf, item, data_size[i]); } btrfs_set_header_nritems(leaf, nritems + nr); + ret = 0; if (slot == 0) { btrfs_cpu_key_to_disk(&disk_key, cpu_key); - fixup_low_keys(trans, root, path, &disk_key, 1); + ret = fixup_low_keys(trans, root, path, &disk_key, 1); } btrfs_unlock_up_safe(path, 1); btrfs_mark_buffer_dirty(leaf); @@ -3670,6 +3645,7 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } + return ret; } /* @@ -3696,14 +3672,16 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, if (ret == 0) return -EEXIST; if (ret < 0) - return ret; + goto out; slot = path->slots[0]; BUG_ON(slot < 0); - setup_items_for_insert(trans, root, path, cpu_key, data_size, + ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, total_data, total_size, nr); - return 0; + +out: + return ret; } /* @@ -3739,11 +3717,13 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root * the tree should have been previously balanced so the deletion does not * empty a node. */ -static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int level, int slot) +static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path, int level, int slot) { struct extent_buffer *parent = path->nodes[level]; u32 nritems; + int ret = 0; + int wret; nritems = btrfs_header_nritems(parent); if (slot != nritems - 1) { @@ -3763,9 +3743,12 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_disk_key disk_key; btrfs_node_key(parent, &disk_key, 0); - fixup_low_keys(trans, root, path, &disk_key, level + 1); + wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); + if (wret) + ret = wret; } btrfs_mark_buffer_dirty(parent); + return ret; } /* @@ -3778,13 +3761,17 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, * The path must have already been setup for deleting the leaf, including * all the proper balancing. path->nodes[1] must be locked. */ -static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct extent_buffer *leaf) +static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *leaf) { + int ret; + WARN_ON(btrfs_header_generation(leaf) != trans->transid); - del_ptr(trans, root, path, 1, path->slots[1]); + ret = del_ptr(trans, root, path, 1, path->slots[1]); + if (ret) + return ret; /* * btrfs_free_extent is expensive, we want to make sure we @@ -3794,9 +3781,8 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, root_sub_used(root, leaf->len); - extent_buffer_get(leaf); btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); - free_extent_buffer_stale(leaf); + return 0; } /* * delete the item at the leaf level in path. If that empties @@ -3813,9 +3799,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, int wret; int i; u32 nritems; - struct btrfs_map_token token; - - btrfs_init_map_token(&token); leaf = path->nodes[0]; last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); @@ -3837,9 +3820,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 ioff; item = btrfs_item_nr(leaf, i); - ioff = btrfs_token_item_offset(leaf, item, &token); - btrfs_set_token_item_offset(leaf, item, - ioff + dsize, &token); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + dsize); } memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), @@ -3857,7 +3839,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, } else { btrfs_set_path_blocking(path); clean_tree_block(trans, root, leaf); - btrfs_del_leaf(trans, root, path, leaf); + ret = btrfs_del_leaf(trans, root, path, leaf); + BUG_ON(ret); } } else { int used = leaf_space_used(leaf, 0, nritems); @@ -3865,7 +3848,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_disk_key disk_key; btrfs_item_key(leaf, &disk_key, 0); - fixup_low_keys(trans, root, path, &disk_key, 1); + wret = fixup_low_keys(trans, root, path, + &disk_key, 1); + if (wret) + ret = wret; } /* delete the leaf if it is mostly empty */ @@ -3893,9 +3879,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_header_nritems(leaf) == 0) { path->slots[1] = slot; - btrfs_del_leaf(trans, root, path, leaf); + ret = btrfs_del_leaf(trans, root, path, leaf); + BUG_ON(ret); free_extent_buffer(leaf); - ret = 0; } else { /* if we're still in the path, make sure * we're dirty. Otherwise, one of the @@ -4073,18 +4059,18 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, path->slots[level] = slot; if (level == path->lowest_level) { ret = 0; - unlock_up(path, level, 1, 0, NULL); + unlock_up(path, level, 1); goto out; } btrfs_set_path_blocking(path); cur = read_node_slot(root, cur, slot); - BUG_ON(!cur); /* -ENOMEM */ + BUG_ON(!cur); btrfs_tree_read_lock(cur); path->locks[level - 1] = BTRFS_READ_LOCK; path->nodes[level - 1] = cur; - unlock_up(path, level, 1, 0, NULL); + unlock_up(path, level, 1); btrfs_clear_path_blocking(path, NULL, 0); } out: @@ -4320,7 +4306,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) } ret = 0; done: - unlock_up(path, 0, 1, 0, NULL); + unlock_up(path, 0, 1); path->leave_spinning = old_spinning; if (!old_spinning) btrfs_set_path_blocking(path); diff --git a/trunk/fs/btrfs/ctree.h b/trunk/fs/btrfs/ctree.h index 5b8ef8eb3521..80b6486fd5e6 100644 --- a/trunk/fs/btrfs/ctree.h +++ b/trunk/fs/btrfs/ctree.h @@ -48,8 +48,6 @@ struct btrfs_ordered_sum; #define BTRFS_MAGIC "_BHRfS_M" -#define BTRFS_MAX_MIRRORS 2 - #define BTRFS_MAX_LEVEL 8 #define BTRFS_COMPAT_EXTENT_TREE_V0 @@ -139,12 +137,6 @@ struct btrfs_ordered_sum; #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 -/* - * the max metadata block size. This limit is somewhat artificial, - * but the memmove costs go through the roof for larger blocks. - */ -#define BTRFS_MAX_METADATA_BLOCKSIZE 65536 - /* * we can actually store much bigger names, but lets not confuse the rest * of linux @@ -469,19 +461,6 @@ struct btrfs_super_block { #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) -/* - * some patches floated around with a second compression method - * lets save that incompat here for when they do get in - * Note we don't actually support it, we're just reserving the - * number - */ -#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4) - -/* - * older kernels tried to do bigger metadata blocks, but the - * code was pretty buggy. Lets not let them try anymore. - */ -#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) #define BTRFS_FEATURE_COMPAT_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL @@ -489,7 +468,6 @@ struct btrfs_super_block { (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ - BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) /* @@ -851,21 +829,6 @@ struct btrfs_csum_item { */ #define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) -#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \ - BTRFS_AVAIL_ALLOC_BIT_SINGLE) - -static inline u64 chunk_to_extended(u64 flags) -{ - if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0) - flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE; - - return flags; -} -static inline u64 extended_to_chunk(u64 flags) -{ - return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; -} - struct btrfs_block_group_item { __le64 used; __le64 chunk_objectid; @@ -1540,7 +1503,6 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_SKIP_BALANCE (1 << 19) #define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) -#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -1564,17 +1526,6 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) -struct btrfs_map_token { - struct extent_buffer *eb; - char *kaddr; - unsigned long offset; -}; - -static inline void btrfs_init_map_token (struct btrfs_map_token *token) -{ - memset(token, 0, sizeof(*token)); -} - /* some macros to generate set/get funcs for the struct fields. This * assumes there is a lefoo_to_cpu for every type, so lets make a simple * one for u8: @@ -1598,22 +1549,20 @@ static inline void btrfs_init_map_token (struct btrfs_map_token *token) #ifndef BTRFS_SETGET_FUNCS #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ -u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, struct btrfs_map_token *token); \ -void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token);\ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); #endif #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ { \ - type *p = page_address(eb->pages[0]); \ + type *p = page_address(eb->first_page); \ u##bits res = le##bits##_to_cpu(p->member); \ return res; \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ u##bits val) \ { \ - type *p = page_address(eb->pages[0]); \ + type *p = page_address(eb->first_page); \ p->member = cpu_to_le##bits(val); \ } @@ -2517,7 +2466,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, - struct btrfs_key *ins, u64 data); + u64 search_end, struct btrfs_key *ins, + u64 data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, int full_backref, int for_cow); int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -2534,8 +2484,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, u64 start, u64 len); -void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, - struct btrfs_root *root); +int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, @@ -2598,8 +2548,8 @@ void btrfs_block_rsv_release(struct btrfs_root *root, u64 num_bytes); int btrfs_set_block_group_ro(struct btrfs_root *root, struct btrfs_block_group_cache *cache); -void btrfs_set_block_group_rw(struct btrfs_root *root, - struct btrfs_block_group_cache *cache); +int btrfs_set_block_group_rw(struct btrfs_root *root, + struct btrfs_block_group_cache *cache); void btrfs_put_block_group_cache(struct btrfs_fs_info *info); u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); int btrfs_error_unpin_extent_range(struct btrfs_root *root, @@ -2618,9 +2568,9 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2); int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type); -void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *new_key); +int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *new_key); struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, @@ -2640,13 +2590,12 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret, u64 new_root_objectid); int btrfs_block_can_be_shared(struct btrfs_root *root, struct extent_buffer *buf); -void btrfs_extend_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - u32 data_size); -void btrfs_truncate_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - u32 new_size, int from_end); +int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u32 data_size); +int btrfs_truncate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u32 new_size, int from_end); int btrfs_split_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -2680,10 +2629,10 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans, return btrfs_del_items(trans, root, path, path->slots[0], 1); } -void setup_items_for_insert(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 *data_size, - u32 total_data, u32 total_size, int nr); +int setup_items_for_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 *data_size, + u32 total_data, u32 total_size, int nr); int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, void *data, u32 data_size); int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, @@ -2710,9 +2659,9 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) } int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); -int __must_check btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, - int update_ref, int for_reloc); +void btrfs_drop_snapshot(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int update_ref, + int for_reloc); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, @@ -2738,6 +2687,24 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) kfree(fs_info->super_for_commit); kfree(fs_info); } +/** + * profile_is_valid - tests whether a given profile is valid and reduced + * @flags: profile to validate + * @extended: if true @flags is treated as an extended profile + */ +static inline int profile_is_valid(u64 flags, int extended) +{ + u64 mask = ~BTRFS_BLOCK_GROUP_PROFILE_MASK; + + flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK; + if (extended) + mask &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; + + if (flags & mask) + return 0; + /* true if zero or exactly one bit set */ + return (flags & (~flags + 1)) == flags; +} /* root-item.c */ int btrfs_find_root_ref(struct btrfs_root *tree_root, @@ -2756,10 +2723,9 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item); -int __must_check btrfs_update_root(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_key *key, - struct btrfs_root_item *item); +int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); @@ -2943,7 +2909,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root); void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); -void btrfs_invalidate_inodes(struct btrfs_root *root); +int btrfs_invalidate_inodes(struct btrfs_root *root); void btrfs_add_delayed_iput(struct inode *inode); void btrfs_run_delayed_iputs(struct btrfs_root *root); int btrfs_prealloc_file_range(struct inode *inode, int mode, @@ -2995,41 +2961,13 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); /* super.c */ int btrfs_parse_options(struct btrfs_root *root, char *options); int btrfs_sync_fs(struct super_block *sb, int wait); -void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...); void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, - unsigned int line, int errno, const char *fmt, ...); - -void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root, const char *function, - unsigned int line, int errno); - -#define btrfs_abort_transaction(trans, root, errno) \ -do { \ - __btrfs_abort_transaction(trans, root, __func__, \ - __LINE__, errno); \ -} while (0) + unsigned int line, int errno); #define btrfs_std_error(fs_info, errno) \ do { \ if ((errno)) \ - __btrfs_std_error((fs_info), __func__, \ - __LINE__, (errno), NULL); \ -} while (0) - -#define btrfs_error(fs_info, errno, fmt, args...) \ -do { \ - __btrfs_std_error((fs_info), __func__, __LINE__, \ - (errno), fmt, ##args); \ -} while (0) - -void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, - unsigned int line, int errno, const char *fmt, ...); - -#define btrfs_panic(fs_info, errno, fmt, args...) \ -do { \ - struct btrfs_fs_info *_i = (fs_info); \ - __btrfs_panic(_i, __func__, __LINE__, errno, fmt, ##args); \ - BUG_ON(!(_i->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)); \ + __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ } while (0) /* acl.c */ @@ -3065,17 +3003,16 @@ void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, struct btrfs_pending_snapshot *pending, u64 *bytes_to_reserve); -int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, +void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, struct btrfs_pending_snapshot *pending); /* scrub.c */ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, struct btrfs_scrub_progress *progress, int readonly); -void btrfs_scrub_pause(struct btrfs_root *root); -void btrfs_scrub_pause_super(struct btrfs_root *root); -void btrfs_scrub_continue(struct btrfs_root *root); -void btrfs_scrub_continue_super(struct btrfs_root *root); -int __btrfs_scrub_cancel(struct btrfs_fs_info *info); +int btrfs_scrub_pause(struct btrfs_root *root); +int btrfs_scrub_pause_super(struct btrfs_root *root); +int btrfs_scrub_continue(struct btrfs_root *root); +int btrfs_scrub_continue_super(struct btrfs_root *root); int btrfs_scrub_cancel(struct btrfs_root *root); int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev); int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); diff --git a/trunk/fs/btrfs/delayed-inode.c b/trunk/fs/btrfs/delayed-inode.c index 03e3748d84d0..fe4cd0f1cef1 100644 --- a/trunk/fs/btrfs/delayed-inode.c +++ b/trunk/fs/btrfs/delayed-inode.c @@ -115,7 +115,6 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode) return NULL; } -/* Will return either the node or PTR_ERR(-ENOMEM) */ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( struct inode *inode) { @@ -837,8 +836,10 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, btrfs_clear_path_blocking(path, NULL, 0); /* insert the keys of the items */ - setup_items_for_insert(trans, root, path, keys, data_size, - total_data_size, total_size, nitems); + ret = setup_items_for_insert(trans, root, path, keys, data_size, + total_data_size, total_size, nitems); + if (ret) + goto error; /* insert the dir index items */ slot = path->slots[0]; @@ -1107,25 +1108,16 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, return 0; } -/* - * Called when committing the transaction. - * Returns 0 on success. - * Returns < 0 on error and returns with an aborted transaction with any - * outstanding delayed items cleaned up. - */ +/* Called when committing the transaction. */ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_root *curr_root = root; struct btrfs_delayed_root *delayed_root; struct btrfs_delayed_node *curr_node, *prev_node; struct btrfs_path *path; struct btrfs_block_rsv *block_rsv; int ret = 0; - if (trans->aborted) - return -EIO; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1138,18 +1130,17 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, curr_node = btrfs_first_delayed_node(delayed_root); while (curr_node) { - curr_root = curr_node->root; - ret = btrfs_insert_delayed_items(trans, path, curr_root, + root = curr_node->root; + ret = btrfs_insert_delayed_items(trans, path, root, curr_node); if (!ret) - ret = btrfs_delete_delayed_items(trans, path, - curr_root, curr_node); + ret = btrfs_delete_delayed_items(trans, path, root, + curr_node); if (!ret) - ret = btrfs_update_delayed_inode(trans, curr_root, - path, curr_node); + ret = btrfs_update_delayed_inode(trans, root, path, + curr_node); if (ret) { btrfs_release_delayed_node(curr_node); - btrfs_abort_transaction(trans, root, ret); break; } @@ -1160,7 +1151,6 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, btrfs_free_path(path); trans->block_rsv = block_rsv; - return ret; } @@ -1381,7 +1371,6 @@ void btrfs_balance_delayed_items(struct btrfs_root *root) btrfs_wq_run_delayed_node(delayed_root, root, 0); } -/* Will return 0 or -ENOMEM */ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, struct inode *dir, diff --git a/trunk/fs/btrfs/delayed-ref.c b/trunk/fs/btrfs/delayed-ref.c index 69f22e3ab3bc..66e4f29505a3 100644 --- a/trunk/fs/btrfs/delayed-ref.c +++ b/trunk/fs/btrfs/delayed-ref.c @@ -420,7 +420,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, * this does all the dirty work in terms of maintaining the correct * overall modification count. */ -static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, +static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref, u64 bytenr, u64 num_bytes, @@ -487,19 +487,20 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, * we've updated the existing ref, free the newly * allocated ref */ - kfree(head_ref); + kfree(ref); } else { delayed_refs->num_heads++; delayed_refs->num_heads_ready++; delayed_refs->num_entries++; trans->delayed_ref_updates++; } + return 0; } /* * helper to insert a delayed tree ref into the rbtree. */ -static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, +static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref, u64 bytenr, u64 num_bytes, u64 parent, @@ -548,17 +549,18 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, * we've updated the existing ref, free the newly * allocated ref */ - kfree(full_ref); + kfree(ref); } else { delayed_refs->num_entries++; trans->delayed_ref_updates++; } + return 0; } /* * helper to insert a delayed data ref into the rbtree. */ -static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, +static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref, u64 bytenr, u64 num_bytes, u64 parent, @@ -609,11 +611,12 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, * we've updated the existing ref, free the newly * allocated ref */ - kfree(full_ref); + kfree(ref); } else { delayed_refs->num_entries++; trans->delayed_ref_updates++; } + return 0; } /* @@ -631,6 +634,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_tree_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; + int ret; BUG_ON(extent_op && extent_op->is_data); ref = kmalloc(sizeof(*ref), GFP_NOFS); @@ -652,12 +656,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, * insert both the head node and the new ref without dropping * the spin lock */ - add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, + ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, num_bytes, action, 0); + BUG_ON(ret); - add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, + ret = add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, level, action, for_cow); + BUG_ON(ret); if (!need_ref_seq(for_cow, ref_root) && waitqueue_active(&delayed_refs->seq_wait)) wake_up(&delayed_refs->seq_wait); @@ -679,6 +685,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_data_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; + int ret; BUG_ON(extent_op && !extent_op->is_data); ref = kmalloc(sizeof(*ref), GFP_NOFS); @@ -700,12 +707,14 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, * insert both the head node and the new ref without dropping * the spin lock */ - add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, + ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, num_bytes, action, 1); + BUG_ON(ret); - add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, + ret = add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, action, for_cow); + BUG_ON(ret); if (!need_ref_seq(for_cow, ref_root) && waitqueue_active(&delayed_refs->seq_wait)) wake_up(&delayed_refs->seq_wait); @@ -720,6 +729,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, { struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; + int ret; head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); if (!head_ref) @@ -730,9 +740,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); - add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, + ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, num_bytes, BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data); + BUG_ON(ret); if (waitqueue_active(&delayed_refs->seq_wait)) wake_up(&delayed_refs->seq_wait); diff --git a/trunk/fs/btrfs/dir-item.c b/trunk/fs/btrfs/dir-item.c index c1a074d0696f..31d84e78129b 100644 --- a/trunk/fs/btrfs/dir-item.c +++ b/trunk/fs/btrfs/dir-item.c @@ -49,8 +49,9 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle di = btrfs_match_dir_item_name(root, path, name, name_len); if (di) return ERR_PTR(-EEXIST); - btrfs_extend_item(trans, root, path, data_size); - } else if (ret < 0) + ret = btrfs_extend_item(trans, root, path, data_size); + } + if (ret < 0) return ERR_PTR(ret); WARN_ON(ret > 0); leaf = path->nodes[0]; @@ -115,7 +116,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, * 'location' is the key to stuff into the directory item, 'type' is the * type of the inode we're pointing to, and 'index' is the sequence number * to use for the second index (if one is created). - * Will return 0 or -ENOMEM */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, @@ -383,8 +383,8 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, start = btrfs_item_ptr_offset(leaf, path->slots[0]); memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_len - (ptr + sub_item_len - start)); - btrfs_truncate_item(trans, root, path, - item_len - sub_item_len, 1); + ret = btrfs_truncate_item(trans, root, path, + item_len - sub_item_len, 1); } return ret; } diff --git a/trunk/fs/btrfs/disk-io.c b/trunk/fs/btrfs/disk-io.c index 20196f411206..534266fe505f 100644 --- a/trunk/fs/btrfs/disk-io.c +++ b/trunk/fs/btrfs/disk-io.c @@ -48,19 +48,20 @@ static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); -static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, +static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int read_only); -static void btrfs_destroy_ordered_operations(struct btrfs_root *root); -static void btrfs_destroy_ordered_extents(struct btrfs_root *root); +static int btrfs_destroy_ordered_operations(struct btrfs_root *root); +static int btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_root *root); -static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); -static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root); +static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); +static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); static int btrfs_destroy_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark); static int btrfs_destroy_pinned_extent(struct btrfs_root *root, struct extent_io_tree *pinned_extents); +static int btrfs_cleanup_transaction(struct btrfs_root *root); /* * end_io_wq structs are used to do processing in task context when an IO is @@ -98,7 +99,6 @@ struct async_submit_bio { */ u64 bio_offset; struct btrfs_work work; - int error; }; /* @@ -332,8 +332,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, return 0; lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, - 0, &cached_state); - if (extent_buffer_uptodate(eb) && + 0, &cached_state, GFP_NOFS); + if (extent_buffer_uptodate(io_tree, eb, cached_state) && btrfs_header_generation(eb) == parent_transid) { ret = 0; goto out; @@ -344,7 +344,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, (unsigned long long)parent_transid, (unsigned long long)btrfs_header_generation(eb)); ret = 1; - clear_extent_buffer_uptodate(eb); + clear_extent_buffer_uptodate(io_tree, eb, &cached_state); out: unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, &cached_state, GFP_NOFS); @@ -360,11 +360,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, u64 start, u64 parent_transid) { struct extent_io_tree *io_tree; - int failed = 0; int ret; int num_copies = 0; int mirror_num = 0; - int failed_mirror = 0; clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; @@ -372,8 +370,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, ret = read_extent_buffer_pages(io_tree, eb, start, WAIT_COMPLETE, btree_get_extent, mirror_num); - if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) - break; + if (!ret && + !verify_parent_transid(io_tree, eb, parent_transid)) + return ret; /* * This buffer's crc is fine, but its contents are corrupted, so @@ -381,31 +380,18 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, * any less wrong. */ if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) - break; - - if (!failed_mirror) { - failed = 1; - printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror); - failed_mirror = eb->failed_mirror; - } + return ret; num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); if (num_copies == 1) - break; + return ret; mirror_num++; - if (mirror_num == failed_mirror) - mirror_num++; - if (mirror_num > num_copies) - break; + return ret; } - - if (failed && !ret) - repair_eb_io_failure(root, eb, failed_mirror); - - return ret; + return -EIO; } /* @@ -418,27 +404,50 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) struct extent_io_tree *tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 found_start; + unsigned long len; struct extent_buffer *eb; + int ret; tree = &BTRFS_I(page->mapping->host)->io_tree; - eb = (struct extent_buffer *)page->private; - if (page != eb->pages[0]) - return 0; + if (page->private == EXTENT_PAGE_PRIVATE) { + WARN_ON(1); + goto out; + } + if (!page->private) { + WARN_ON(1); + goto out; + } + len = page->private >> 2; + WARN_ON(len == 0); + + eb = alloc_extent_buffer(tree, start, len, page); + if (eb == NULL) { + WARN_ON(1); + goto out; + } + ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, + btrfs_header_generation(eb)); + BUG_ON(ret); + WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN)); + found_start = btrfs_header_bytenr(eb); if (found_start != start) { WARN_ON(1); - return 0; + goto err; } - if (eb->pages[0] != page) { + if (eb->first_page != page) { WARN_ON(1); - return 0; + goto err; } if (!PageUptodate(page)) { WARN_ON(1); - return 0; + goto err; } csum_tree_block(root, eb, 0); +err: + free_extent_buffer(eb); +out: return 0; } @@ -528,74 +537,34 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } -struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree, - struct page *page, int max_walk) -{ - struct extent_buffer *eb; - u64 start = page_offset(page); - u64 target = start; - u64 min_start; - - if (start < max_walk) - min_start = 0; - else - min_start = start - max_walk; - - while (start >= min_start) { - eb = find_extent_buffer(tree, start, 0); - if (eb) { - /* - * we found an extent buffer and it contains our page - * horray! - */ - if (eb->start <= target && - eb->start + eb->len > target) - return eb; - - /* we found an extent buffer that wasn't for us */ - free_extent_buffer(eb); - return NULL; - } - if (start == 0) - break; - start -= PAGE_CACHE_SIZE; - } - return NULL; -} - static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { struct extent_io_tree *tree; u64 found_start; int found_level; + unsigned long len; struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; int ret = 0; - int reads_done; + tree = &BTRFS_I(page->mapping->host)->io_tree; + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; if (!page->private) goto out; - tree = &BTRFS_I(page->mapping->host)->io_tree; - eb = (struct extent_buffer *)page->private; + len = page->private >> 2; + WARN_ON(len == 0); - /* the pending IO might have been the only thing that kept this buffer - * in memory. Make sure we have a ref for all this other checks - */ - extent_buffer_get(eb); - - reads_done = atomic_dec_and_test(&eb->io_pages); - if (!reads_done) - goto err; - - if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { + eb = alloc_extent_buffer(tree, start, len, page); + if (eb == NULL) { ret = -EIO; - goto err; + goto out; } found_start = btrfs_header_bytenr(eb); - if (found_start != eb->start) { + if (found_start != start) { printk_ratelimited(KERN_INFO "btrfs bad tree block start " "%llu %llu\n", (unsigned long long)found_start, @@ -603,6 +572,13 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, ret = -EIO; goto err; } + if (eb->first_page != page) { + printk(KERN_INFO "btrfs bad first page %lu %lu\n", + eb->first_page->index, page->index); + WARN_ON(1); + ret = -EIO; + goto err; + } if (check_tree_block_fsid(root, eb)) { printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", (unsigned long long)eb->start); @@ -630,31 +606,48 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, ret = -EIO; } - if (!ret) - set_extent_buffer_uptodate(eb); + end = min_t(u64, eb->len, PAGE_CACHE_SIZE); + end = eb->start + end - 1; err: if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); btree_readahead_hook(root, eb, eb->start, ret); } - if (ret) - clear_extent_buffer_uptodate(eb); free_extent_buffer(eb); out: return ret; } -static int btree_io_failed_hook(struct page *page, int failed_mirror) +static int btree_io_failed_hook(struct bio *failed_bio, + struct page *page, u64 start, u64 end, + int mirror_num, struct extent_state *state) { + struct extent_io_tree *tree; + unsigned long len; struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; - eb = (struct extent_buffer *)page->private; - set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); - eb->failed_mirror = failed_mirror; - if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) + tree = &BTRFS_I(page->mapping->host)->io_tree; + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + if (!page->private) + goto out; + + len = page->private >> 2; + WARN_ON(len == 0); + + eb = alloc_extent_buffer(tree, start, len, page); + if (eb == NULL) + goto out; + + if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { + clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); btree_readahead_hook(root, eb, eb->start, -EIO); + } + free_extent_buffer(eb); + +out: return -EIO; /* we fixed nothing */ } @@ -726,14 +719,11 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) static void run_one_async_start(struct btrfs_work *work) { struct async_submit_bio *async; - int ret; async = container_of(work, struct async_submit_bio, work); - ret = async->submit_bio_start(async->inode, async->rw, async->bio, - async->mirror_num, async->bio_flags, - async->bio_offset); - if (ret) - async->error = ret; + async->submit_bio_start(async->inode, async->rw, async->bio, + async->mirror_num, async->bio_flags, + async->bio_offset); } static void run_one_async_done(struct btrfs_work *work) @@ -754,12 +744,6 @@ static void run_one_async_done(struct btrfs_work *work) waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); - /* If an error occured we just want to clean up the bio and move on */ - if (async->error) { - bio_endio(async->bio, async->error); - return; - } - async->submit_bio_done(async->inode, async->rw, async->bio, async->mirror_num, async->bio_flags, async->bio_offset); @@ -801,8 +785,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->bio_flags = bio_flags; async->bio_offset = bio_offset; - async->error = 0; - atomic_inc(&fs_info->nr_async_submits); if (rw & REQ_SYNC) @@ -824,18 +806,15 @@ static int btree_csum_one_bio(struct bio *bio) struct bio_vec *bvec = bio->bi_io_vec; int bio_index = 0; struct btrfs_root *root; - int ret = 0; WARN_ON(bio->bi_vcnt <= 0); while (bio_index < bio->bi_vcnt) { root = BTRFS_I(bvec->bv_page->mapping->host)->root; - ret = csum_dirty_buffer(root, bvec->bv_page); - if (ret) - break; + csum_dirty_buffer(root, bvec->bv_page); bio_index++; bvec++; } - return ret; + return 0; } static int __btree_submit_bio_start(struct inode *inode, int rw, @@ -847,7 +826,8 @@ static int __btree_submit_bio_start(struct inode *inode, int rw, * when we're called for a write, we're already in the async * submission context. Just jump into btrfs_map_bio */ - return btree_csum_one_bio(bio); + btree_csum_one_bio(bio); + return 0; } static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, @@ -867,16 +847,15 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, { int ret; - if (!(rw & REQ_WRITE)) { + ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, + bio, 1); + BUG_ON(ret); + if (!(rw & REQ_WRITE)) { /* * called for a read, do the setup so that checksum validation * can happen in the async kernel threads */ - ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, - bio, 1); - if (ret) - return ret; return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 0); } @@ -914,6 +893,34 @@ static int btree_migratepage(struct address_space *mapping, } #endif +static int btree_writepage(struct page *page, struct writeback_control *wbc) +{ + struct extent_io_tree *tree; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + struct extent_buffer *eb; + int was_dirty; + + tree = &BTRFS_I(page->mapping->host)->io_tree; + if (!(current->flags & PF_MEMALLOC)) { + return extent_write_full_page(tree, page, + btree_get_extent, wbc); + } + + redirty_page_for_writepage(wbc, page); + eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE); + WARN_ON(!eb); + + was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); + if (!was_dirty) { + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE; + spin_unlock(&root->fs_info->delalloc_lock); + } + free_extent_buffer(eb); + + unlock_page(page); + return 0; +} static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) @@ -933,7 +940,7 @@ static int btree_writepages(struct address_space *mapping, if (num_dirty < thresh) return 0; } - return btree_write_cache_pages(mapping, wbc); + return extent_writepages(tree, mapping, btree_get_extent, wbc); } static int btree_readpage(struct file *file, struct page *page) @@ -945,8 +952,16 @@ static int btree_readpage(struct file *file, struct page *page) static int btree_releasepage(struct page *page, gfp_t gfp_flags) { + struct extent_io_tree *tree; + struct extent_map_tree *map; + int ret; + if (PageWriteback(page) || PageDirty(page)) return 0; + + tree = &BTRFS_I(page->mapping->host)->io_tree; + map = &BTRFS_I(page->mapping->host)->extent_tree; + /* * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing * slab allocation from alloc_extent_state down the callchain where @@ -954,7 +969,18 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) */ gfp_flags &= ~GFP_SLAB_BUG_MASK; - return try_release_extent_buffer(page, gfp_flags); + ret = try_release_extent_state(map, tree, page, gfp_flags); + if (!ret) + return 0; + + ret = try_release_extent_buffer(tree, page); + if (ret == 1) { + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); + } + + return ret; } static void btree_invalidatepage(struct page *page, unsigned long offset) @@ -972,28 +998,15 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) } } -static int btree_set_page_dirty(struct page *page) -{ - struct extent_buffer *eb; - - BUG_ON(!PagePrivate(page)); - eb = (struct extent_buffer *)page->private; - BUG_ON(!eb); - BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); - BUG_ON(!atomic_read(&eb->refs)); - btrfs_assert_tree_locked(eb); - return __set_page_dirty_nobuffers(page); -} - static const struct address_space_operations btree_aops = { .readpage = btree_readpage, + .writepage = btree_writepage, .writepages = btree_writepages, .releasepage = btree_releasepage, .invalidatepage = btree_invalidatepage, #ifdef CONFIG_MIGRATION .migratepage = btree_migratepage, #endif - .set_page_dirty = btree_set_page_dirty, }; int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, @@ -1036,7 +1049,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { free_extent_buffer(buf); return -EIO; - } else if (extent_buffer_uptodate(buf)) { + } else if (extent_buffer_uptodate(io_tree, buf, NULL)) { *eb = buf; } else { free_extent_buffer(buf); @@ -1061,20 +1074,20 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct extent_buffer *eb; eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, - bytenr, blocksize); + bytenr, blocksize, NULL); return eb; } int btrfs_write_tree_block(struct extent_buffer *buf) { - return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start, + return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, buf->start + buf->len - 1); } int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) { - return filemap_fdatawait_range(buf->pages[0]->mapping, + return filemap_fdatawait_range(buf->first_page->mapping, buf->start, buf->start + buf->len - 1); } @@ -1089,13 +1102,17 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); + + if (ret == 0) + set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); return buf; } -void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf) +int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf) { + struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) { btrfs_assert_tree_locked(buf); @@ -1104,27 +1121,23 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, spin_lock(&root->fs_info->delalloc_lock); if (root->fs_info->dirty_metadata_bytes >= buf->len) root->fs_info->dirty_metadata_bytes -= buf->len; - else { - spin_unlock(&root->fs_info->delalloc_lock); - btrfs_panic(root->fs_info, -EOVERFLOW, - "Can't clear %lu bytes from " - " dirty_mdatadata_bytes (%lu)", - buf->len, - root->fs_info->dirty_metadata_bytes); - } + else + WARN_ON(1); spin_unlock(&root->fs_info->delalloc_lock); } /* ugh, clear_extent_buffer_dirty needs to lock the page */ btrfs_set_lock_blocking(buf); - clear_extent_buffer_dirty(buf); + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, + buf); } + return 0; } -static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, - u32 stripesize, struct btrfs_root *root, - struct btrfs_fs_info *fs_info, - u64 objectid) +static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, + u32 stripesize, struct btrfs_root *root, + struct btrfs_fs_info *fs_info, + u64 objectid) { root->node = NULL; root->commit_root = NULL; @@ -1176,12 +1189,13 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->defrag_running = 0; root->root_key.objectid = objectid; root->anon_dev = 0; + return 0; } -static int __must_check find_and_setup_root(struct btrfs_root *tree_root, - struct btrfs_fs_info *fs_info, - u64 objectid, - struct btrfs_root *root) +static int find_and_setup_root(struct btrfs_root *tree_root, + struct btrfs_fs_info *fs_info, + u64 objectid, + struct btrfs_root *root) { int ret; u32 blocksize; @@ -1194,8 +1208,7 @@ static int __must_check find_and_setup_root(struct btrfs_root *tree_root, &root->root_item, &root->root_key); if (ret > 0) return -ENOENT; - else if (ret < 0) - return ret; + BUG_ON(ret); generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); @@ -1364,7 +1377,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); root->commit_root = btrfs_root_node(root); - BUG_ON(!root->node); /* -ENOMEM */ + BUG_ON(!root->node); out: if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { root->ref_cows = 1; @@ -1500,6 +1513,41 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) return 0; } +static int bio_ready_for_csum(struct bio *bio) +{ + u64 length = 0; + u64 buf_len = 0; + u64 start = 0; + struct page *page; + struct extent_io_tree *io_tree = NULL; + struct bio_vec *bvec; + int i; + int ret; + + bio_for_each_segment(bvec, bio, i) { + page = bvec->bv_page; + if (page->private == EXTENT_PAGE_PRIVATE) { + length += bvec->bv_len; + continue; + } + if (!page->private) { + length += bvec->bv_len; + continue; + } + length = bvec->bv_len; + buf_len = page->private >> 2; + start = page_offset(page) + bvec->bv_offset; + io_tree = &BTRFS_I(page->mapping->host)->io_tree; + } + /* are we fully contained in this bio? */ + if (buf_len <= length) + return 1; + + ret = extent_range_uptodate(io_tree, start + length, + start + buf_len - 1); + return ret; +} + /* * called by the kthread helper functions to finally call the bio end_io * functions. This is where read checksum verification actually happens @@ -1515,6 +1563,17 @@ static void end_workqueue_fn(struct btrfs_work *work) bio = end_io_wq->bio; fs_info = end_io_wq->info; + /* metadata bio reads are special because the whole tree block must + * be checksummed at once. This makes sure the entire block is in + * ram and up to date before trying to verify things. For + * blocksize <= pagesize, it is basically a noop + */ + if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata && + !bio_ready_for_csum(bio)) { + btrfs_queue_worker(&fs_info->endio_meta_workers, + &end_io_wq->work); + return; + } error = end_io_wq->error; bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; @@ -1555,10 +1614,9 @@ static int transaction_kthread(void *arg) u64 transid; unsigned long now; unsigned long delay; - bool cannot_commit; + int ret; do { - cannot_commit = false; delay = HZ * 30; vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); @@ -1580,14 +1638,11 @@ static int transaction_kthread(void *arg) transid = cur->transid; spin_unlock(&root->fs_info->trans_lock); - /* If the file system is aborted, this will always fail. */ trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - cannot_commit = true; - goto sleep; - } + BUG_ON(IS_ERR(trans)); if (transid == trans->transid) { - btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); } else { btrfs_end_transaction(trans, root); } @@ -1598,8 +1653,7 @@ static int transaction_kthread(void *arg) if (!try_to_freeze()) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop() && - (!btrfs_transaction_blocked(root->fs_info) || - cannot_commit)) + !btrfs_transaction_blocked(root->fs_info)) schedule_timeout(delay); __set_current_state(TASK_RUNNING); } @@ -1988,7 +2042,6 @@ int open_ctree(struct super_block *sb, RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, fs_info->btree_inode->i_mapping); - BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0; extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree); BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; @@ -2031,7 +2084,6 @@ int open_ctree(struct super_block *sb, __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - invalidate_bdev(fs_devices->latest_bdev); bh = btrfs_read_dev_super(fs_devices->latest_bdev); if (!bh) { err = -EINVAL; @@ -2052,12 +2104,7 @@ int open_ctree(struct super_block *sb, /* check FS state, whether FS is broken. */ fs_info->fs_state |= btrfs_super_flags(disk_super); - ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); - if (ret) { - printk(KERN_ERR "btrfs: superblock contains fatal errors\n"); - err = ret; - goto fail_alloc; - } + btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); /* * run through our array of backup supers and setup @@ -2088,55 +2135,10 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } - if (btrfs_super_leafsize(disk_super) != - btrfs_super_nodesize(disk_super)) { - printk(KERN_ERR "BTRFS: couldn't mount because metadata " - "blocksizes don't match. node %d leaf %d\n", - btrfs_super_nodesize(disk_super), - btrfs_super_leafsize(disk_super)); - err = -EINVAL; - goto fail_alloc; - } - if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { - printk(KERN_ERR "BTRFS: couldn't mount because metadata " - "blocksize (%d) was too large\n", - btrfs_super_leafsize(disk_super)); - err = -EINVAL; - goto fail_alloc; - } - features = btrfs_super_incompat_flags(disk_super); features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; - - /* - * flag our filesystem as having big metadata blocks if - * they are bigger than the page size - */ - if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) { - if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) - printk(KERN_INFO "btrfs flagging fs with big metadata feature\n"); - features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; - } - - nodesize = btrfs_super_nodesize(disk_super); - leafsize = btrfs_super_leafsize(disk_super); - sectorsize = btrfs_super_sectorsize(disk_super); - stripesize = btrfs_super_stripesize(disk_super); - - /* - * mixed block groups end up with duplicate but slightly offset - * extent buffers for the same range. It leads to corruptions - */ - if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && - (sectorsize != leafsize)) { - printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes " - "are not allowed for mixed block groups on %s\n", - sb->s_id); - goto fail_alloc; - } - btrfs_set_super_incompat_flags(disk_super, features); features = btrfs_super_compat_ro_flags(disk_super) & @@ -2240,6 +2242,10 @@ int open_ctree(struct super_block *sb, fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 4 * 1024 * 1024 / PAGE_CACHE_SIZE); + nodesize = btrfs_super_nodesize(disk_super); + leafsize = btrfs_super_leafsize(disk_super); + sectorsize = btrfs_super_sectorsize(disk_super); + stripesize = btrfs_super_stripesize(disk_super); tree_root->nodesize = nodesize; tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; @@ -2279,7 +2285,7 @@ int open_ctree(struct super_block *sb, chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), blocksize, generation); - BUG_ON(!chunk_root->node); /* -ENOMEM */ + BUG_ON(!chunk_root->node); if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", sb->s_id); @@ -2419,31 +2425,21 @@ int open_ctree(struct super_block *sb, log_tree_root->node = read_tree_block(tree_root, bytenr, blocksize, generation + 1); - /* returns with log_tree_root freed on success */ ret = btrfs_recover_log_trees(log_tree_root); - if (ret) { - btrfs_error(tree_root->fs_info, ret, - "Failed to recover log tree"); - free_extent_buffer(log_tree_root->node); - kfree(log_tree_root); - goto fail_trans_kthread; - } + BUG_ON(ret); if (sb->s_flags & MS_RDONLY) { - ret = btrfs_commit_super(tree_root); - if (ret) - goto fail_trans_kthread; + ret = btrfs_commit_super(tree_root); + BUG_ON(ret); } } ret = btrfs_find_orphan_roots(tree_root); - if (ret) - goto fail_trans_kthread; + BUG_ON(ret); if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_cleanup_fs_roots(fs_info); - if (ret) { - } + BUG_ON(ret); ret = btrfs_recover_relocation(tree_root); if (ret < 0) { @@ -2863,8 +2859,6 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) if (total_errors > max_errors) { printk(KERN_ERR "btrfs: %d errors while writing supers\n", total_errors); - - /* This shouldn't happen. FUA is masked off if unsupported */ BUG(); } @@ -2881,9 +2875,9 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) } mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (total_errors > max_errors) { - btrfs_error(root->fs_info, -EIO, - "%d errors while writing supers", total_errors); - return -EIO; + printk(KERN_ERR "btrfs: %d errors while writing supers\n", + total_errors); + BUG(); } return 0; } @@ -2897,20 +2891,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, return ret; } -/* Kill all outstanding I/O */ -void btrfs_abort_devices(struct btrfs_root *root) -{ - struct list_head *head; - struct btrfs_device *dev; - mutex_lock(&root->fs_info->fs_devices->device_list_mutex); - head = &root->fs_info->fs_devices->devices; - list_for_each_entry_rcu(dev, head, dev_list) { - blk_abort_queue(dev->bdev->bd_disk->queue); - } - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); -} - -void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) +int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { spin_lock(&fs_info->fs_roots_radix_lock); radix_tree_delete(&fs_info->fs_roots_radix, @@ -2923,6 +2904,7 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) __btrfs_remove_free_space_cache(root->free_ino_pinned); __btrfs_remove_free_space_cache(root->free_ino_ctl); free_fs_root(root); + return 0; } static void free_fs_root(struct btrfs_root *root) @@ -2939,7 +2921,7 @@ static void free_fs_root(struct btrfs_root *root) kfree(root); } -static void del_fs_roots(struct btrfs_fs_info *fs_info) +static int del_fs_roots(struct btrfs_fs_info *fs_info) { int ret; struct btrfs_root *gang[8]; @@ -2968,6 +2950,7 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info) for (i = 0; i < ret; i++) btrfs_free_fs_root(fs_info, gang[i]); } + return 0; } int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) @@ -3016,21 +2999,14 @@ int btrfs_commit_super(struct btrfs_root *root) if (IS_ERR(trans)) return PTR_ERR(trans); ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; + BUG_ON(ret); /* run commit again to drop the original snapshot */ trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; + btrfs_commit_transaction(trans, root); ret = btrfs_write_and_wait_transaction(NULL, root); - if (ret) { - btrfs_error(root->fs_info, ret, - "Failed to sync btree inode to disk."); - return ret; - } + BUG_ON(ret); ret = write_ctree_super(NULL, root, 0); return ret; @@ -3146,9 +3122,10 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) { int ret; - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; - ret = extent_buffer_uptodate(buf); + ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf, + NULL); if (!ret) return ret; @@ -3159,13 +3136,16 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - return set_extent_buffer_uptodate(buf); + struct inode *btree_inode = buf->first_page->mapping->host; + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, + buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; u64 transid = btrfs_header_generation(buf); + struct inode *btree_inode = root->fs_info->btree_inode; int was_dirty; btrfs_assert_tree_locked(buf); @@ -3177,7 +3157,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) (unsigned long long)root->fs_info->generation); WARN_ON(1); } - was_dirty = set_extent_buffer_dirty(buf); + was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, + buf); if (!was_dirty) { spin_lock(&root->fs_info->delalloc_lock); root->fs_info->dirty_metadata_bytes += buf->len; @@ -3231,8 +3212,12 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; - return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; + int ret; + ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); + if (ret == 0) + set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); + return ret; } static int btree_lock_page_hook(struct page *page, void *data, @@ -3240,21 +3225,17 @@ static int btree_lock_page_hook(struct page *page, void *data, { struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_buffer *eb; + unsigned long len; + u64 bytenr = page_offset(page); - /* - * We culled this eb but the page is still hanging out on the mapping, - * carry on. - */ - if (!PagePrivate(page)) + if (page->private == EXTENT_PAGE_PRIVATE) goto out; - eb = (struct extent_buffer *)page->private; - if (!eb) { - WARN_ON(1); - goto out; - } - if (page != eb->pages[0]) + len = page->private >> 2; + eb = find_extent_buffer(io_tree, bytenr, len); + if (!eb) goto out; if (!btrfs_try_tree_write_lock(eb)) { @@ -3273,6 +3254,7 @@ static int btree_lock_page_hook(struct page *page, void *data, } btrfs_tree_unlock(eb); + free_extent_buffer(eb); out: if (!trylock_page(page)) { flush_fn(data); @@ -3281,23 +3263,15 @@ static int btree_lock_page_hook(struct page *page, void *data, return 0; } -static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, +static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int read_only) { - if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) { - printk(KERN_ERR "btrfs: unsupported checksum algorithm\n"); - return -EINVAL; - } - if (read_only) - return 0; + return; - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) printk(KERN_WARNING "warning: mount fs with errors, " "running btrfsck is recommended\n"); - } - - return 0; } int btrfs_error_commit_super(struct btrfs_root *root) @@ -3319,7 +3293,7 @@ int btrfs_error_commit_super(struct btrfs_root *root) return ret; } -static void btrfs_destroy_ordered_operations(struct btrfs_root *root) +static int btrfs_destroy_ordered_operations(struct btrfs_root *root) { struct btrfs_inode *btrfs_inode; struct list_head splice; @@ -3341,9 +3315,11 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root) spin_unlock(&root->fs_info->ordered_extent_lock); mutex_unlock(&root->fs_info->ordered_operations_mutex); + + return 0; } -static void btrfs_destroy_ordered_extents(struct btrfs_root *root) +static int btrfs_destroy_ordered_extents(struct btrfs_root *root) { struct list_head splice; struct btrfs_ordered_extent *ordered; @@ -3375,10 +3351,12 @@ static void btrfs_destroy_ordered_extents(struct btrfs_root *root) } spin_unlock(&root->fs_info->ordered_extent_lock); + + return 0; } -int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, - struct btrfs_root *root) +static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, + struct btrfs_root *root) { struct rb_node *node; struct btrfs_delayed_ref_root *delayed_refs; @@ -3387,7 +3365,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, delayed_refs = &trans->delayed_refs; -again: spin_lock(&delayed_refs->lock); if (delayed_refs->num_entries == 0) { spin_unlock(&delayed_refs->lock); @@ -3409,7 +3386,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_delayed_ref_head *head; head = btrfs_delayed_node_to_head(ref); - spin_unlock(&delayed_refs->lock); mutex_lock(&head->mutex); kfree(head->extent_op); delayed_refs->num_heads--; @@ -3417,9 +3393,8 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, delayed_refs->num_heads_ready--; list_del_init(&head->cluster); mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(ref); - goto again; } + spin_unlock(&delayed_refs->lock); btrfs_put_delayed_ref(ref); @@ -3432,7 +3407,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, return ret; } -static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) +static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) { struct btrfs_pending_snapshot *snapshot; struct list_head splice; @@ -3450,9 +3425,11 @@ static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) kfree(snapshot); } + + return 0; } -static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) +static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) { struct btrfs_inode *btrfs_inode; struct list_head splice; @@ -3472,6 +3449,8 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) } spin_unlock(&root->fs_info->delalloc_lock); + + return 0; } static int btrfs_destroy_marked_extents(struct btrfs_root *root, @@ -3562,43 +3541,13 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, return 0; } -void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, - struct btrfs_root *root) -{ - btrfs_destroy_delayed_refs(cur_trans, root); - btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, - cur_trans->dirty_pages.dirty_bytes); - - /* FIXME: cleanup wait for commit */ - cur_trans->in_commit = 1; - cur_trans->blocked = 1; - if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) - wake_up(&root->fs_info->transaction_blocked_wait); - - cur_trans->blocked = 0; - if (waitqueue_active(&root->fs_info->transaction_wait)) - wake_up(&root->fs_info->transaction_wait); - - cur_trans->commit_done = 1; - if (waitqueue_active(&cur_trans->commit_wait)) - wake_up(&cur_trans->commit_wait); - - btrfs_destroy_pending_snapshots(cur_trans); - - btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, - EXTENT_DIRTY); - - /* - memset(cur_trans, 0, sizeof(*cur_trans)); - kmem_cache_free(btrfs_transaction_cachep, cur_trans); - */ -} - -int btrfs_cleanup_transaction(struct btrfs_root *root) +static int btrfs_cleanup_transaction(struct btrfs_root *root) { struct btrfs_transaction *t; LIST_HEAD(list); + WARN_ON(1); + mutex_lock(&root->fs_info->transaction_kthread_mutex); spin_lock(&root->fs_info->trans_lock); @@ -3663,17 +3612,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) return 0; } -static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page, - u64 start, u64 end, - struct extent_state *state) -{ - struct super_block *sb = page->mapping->host->i_sb; - struct btrfs_fs_info *fs_info = btrfs_sb(sb); - btrfs_error(fs_info, -EIO, - "Error occured while writing out btree at %llu", start); - return -EIO; -} - static struct extent_io_ops btree_extent_io_ops = { .write_cache_pages_lock_hook = btree_lock_page_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, @@ -3681,5 +3619,4 @@ static struct extent_io_ops btree_extent_io_ops = { .submit_bio_hook = btree_submit_bio_hook, /* note we're sharing with inode.c for the merge bio hook */ .merge_bio_hook = btrfs_merge_bio_hook, - .writepage_io_failed_hook = btree_writepage_io_failed_hook, }; diff --git a/trunk/fs/btrfs/disk-io.h b/trunk/fs/btrfs/disk-io.h index a7ace1a2dd12..e4bc4741319b 100644 --- a/trunk/fs/btrfs/disk-io.h +++ b/trunk/fs/btrfs/disk-io.h @@ -44,8 +44,8 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, int mirror_num, struct extent_buffer **eb); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); -void clean_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct extent_buffer *buf); +int clean_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf); int open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options); @@ -64,7 +64,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); -void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); +int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct extent_buffer *buf); int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); @@ -85,10 +85,6 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_cleanup_transaction(struct btrfs_root *root); -void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, - struct btrfs_root *root); -void btrfs_abort_devices(struct btrfs_root *root); #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_init_lockdep(void); diff --git a/trunk/fs/btrfs/export.c b/trunk/fs/btrfs/export.c index e887ee62b6d4..5f77166fd01c 100644 --- a/trunk/fs/btrfs/export.c +++ b/trunk/fs/btrfs/export.c @@ -193,7 +193,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child) if (ret < 0) goto fail; - BUG_ON(ret == 0); /* Key with offset of -1 found */ + BUG_ON(ret == 0); if (path->slots[0] == 0) { ret = -ENOENT; goto fail; diff --git a/trunk/fs/btrfs/extent-tree.c b/trunk/fs/btrfs/extent-tree.c index a84420491c11..37e0a800d34e 100644 --- a/trunk/fs/btrfs/extent-tree.c +++ b/trunk/fs/btrfs/extent-tree.c @@ -245,7 +245,7 @@ static int exclude_super_stripes(struct btrfs_root *root, cache->bytes_super += stripe_len; ret = add_excluded_extent(root, cache->key.objectid, stripe_len); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { @@ -253,13 +253,13 @@ static int exclude_super_stripes(struct btrfs_root *root, ret = btrfs_rmap_block(&root->fs_info->mapping_tree, cache->key.objectid, bytenr, 0, &logical, &nr, &stripe_len); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); while (nr--) { cache->bytes_super += stripe_len; ret = add_excluded_extent(root, logical[nr], stripe_len); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } kfree(logical); @@ -321,7 +321,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, total_added += size; ret = btrfs_add_free_space(block_group, start, size); - BUG_ON(ret); /* -ENOMEM or logic error */ + BUG_ON(ret); start = extent_end + 1; } else { break; @@ -332,7 +332,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, size = end - start; total_added += size; ret = btrfs_add_free_space(block_group, start, size); - BUG_ON(ret); /* -ENOMEM or logic error */ + BUG_ON(ret); } return total_added; @@ -474,8 +474,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, int ret = 0; caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); - if (!caching_ctl) - return -ENOMEM; + BUG_ON(!caching_ctl); INIT_LIST_HEAD(&caching_ctl->list); mutex_init(&caching_ctl->mutex); @@ -983,7 +982,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, ret = btrfs_next_leaf(root, path); if (ret < 0) return ret; - BUG_ON(ret > 0); /* Corruption */ + BUG_ON(ret > 0); leaf = path->nodes[0]; } btrfs_item_key_to_cpu(leaf, &found_key, @@ -1009,9 +1008,9 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, new_size + extra_size, 1); if (ret < 0) return ret; - BUG_ON(ret); /* Corruption */ + BUG_ON(ret); - btrfs_extend_item(trans, root, path, new_size); + ret = btrfs_extend_item(trans, root, path, new_size); leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); @@ -1479,11 +1478,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, err = ret; goto out; } - if (ret && !insert) { - err = -ENOENT; - goto out; - } - BUG_ON(ret); /* Corruption */ + BUG_ON(ret); leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, path->slots[0]); @@ -1597,13 +1592,13 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, * helper to add new inline back ref */ static noinline_for_stack -void setup_inline_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_extent_inline_ref *iref, - u64 parent, u64 root_objectid, - u64 owner, u64 offset, int refs_to_add, - struct btrfs_delayed_extent_op *extent_op) +int setup_inline_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref *iref, + u64 parent, u64 root_objectid, + u64 owner, u64 offset, int refs_to_add, + struct btrfs_delayed_extent_op *extent_op) { struct extent_buffer *leaf; struct btrfs_extent_item *ei; @@ -1613,6 +1608,7 @@ void setup_inline_extent_backref(struct btrfs_trans_handle *trans, u64 refs; int size; int type; + int ret; leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); @@ -1621,7 +1617,7 @@ void setup_inline_extent_backref(struct btrfs_trans_handle *trans, type = extent_ref_type(parent, owner); size = btrfs_extent_inline_ref_size(type); - btrfs_extend_item(trans, root, path, size); + ret = btrfs_extend_item(trans, root, path, size); ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, ei); @@ -1656,6 +1652,7 @@ void setup_inline_extent_backref(struct btrfs_trans_handle *trans, btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); } btrfs_mark_buffer_dirty(leaf); + return 0; } static int lookup_extent_backref(struct btrfs_trans_handle *trans, @@ -1690,12 +1687,12 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, * helper to update/remove inline back ref */ static noinline_for_stack -void update_inline_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_extent_inline_ref *iref, - int refs_to_mod, - struct btrfs_delayed_extent_op *extent_op) +int update_inline_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_extent_inline_ref *iref, + int refs_to_mod, + struct btrfs_delayed_extent_op *extent_op) { struct extent_buffer *leaf; struct btrfs_extent_item *ei; @@ -1706,6 +1703,7 @@ void update_inline_extent_backref(struct btrfs_trans_handle *trans, u32 item_size; int size; int type; + int ret; u64 refs; leaf = path->nodes[0]; @@ -1747,9 +1745,10 @@ void update_inline_extent_backref(struct btrfs_trans_handle *trans, memmove_extent_buffer(leaf, ptr, ptr + size, end - ptr - size); item_size -= size; - btrfs_truncate_item(trans, root, path, item_size, 1); + ret = btrfs_truncate_item(trans, root, path, item_size, 1); } btrfs_mark_buffer_dirty(leaf); + return 0; } static noinline_for_stack @@ -1769,13 +1768,13 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans, root_objectid, owner, offset, 1); if (ret == 0) { BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); - update_inline_extent_backref(trans, root, path, iref, - refs_to_add, extent_op); + ret = update_inline_extent_backref(trans, root, path, iref, + refs_to_add, extent_op); } else if (ret == -ENOENT) { - setup_inline_extent_backref(trans, root, path, iref, parent, - root_objectid, owner, offset, - refs_to_add, extent_op); - ret = 0; + ret = setup_inline_extent_backref(trans, root, path, iref, + parent, root_objectid, + owner, offset, refs_to_add, + extent_op); } return ret; } @@ -1805,12 +1804,12 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_extent_inline_ref *iref, int refs_to_drop, int is_data) { - int ret = 0; + int ret; BUG_ON(!is_data && refs_to_drop != 1); if (iref) { - update_inline_extent_backref(trans, root, path, iref, - -refs_to_drop, NULL); + ret = update_inline_extent_backref(trans, root, path, iref, + -refs_to_drop, NULL); } else if (is_data) { ret = remove_extent_data_ref(trans, root, path, refs_to_drop); } else { @@ -1836,7 +1835,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, bytenr, &num_bytes, &bbio, 0); - /* Error condition is -ENOMEM */ if (!ret) { struct btrfs_bio_stripe *stripe = bbio->stripes; int i; @@ -1852,7 +1850,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, if (!ret) discarded_bytes += stripe->length; else if (ret != -EOPNOTSUPP) - break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */ + break; /* * Just in case we get back EOPNOTSUPP for some reason, @@ -1871,7 +1869,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, return ret; } -/* Can return -ENOMEM */ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -1947,8 +1944,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ret = insert_extent_backref(trans, root->fs_info->extent_root, path, bytenr, parent, root_objectid, owner, offset, refs_to_add); - if (ret) - btrfs_abort_transaction(trans, root, ret); + BUG_ON(ret); out: btrfs_free_path(path); return err; @@ -2035,9 +2031,6 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, int ret; int err = 0; - if (trans->aborted) - return 0; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -2135,11 +2128,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_extent_op *extent_op, int insert_reserved) { - int ret = 0; - - if (trans->aborted) - return 0; - + int ret; if (btrfs_delayed_ref_is_head(node)) { struct btrfs_delayed_ref_head *head; /* @@ -2157,10 +2146,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, ret = btrfs_del_csums(trans, root, node->bytenr, node->num_bytes); + BUG_ON(ret); } } mutex_unlock(&head->mutex); - return ret; + return 0; } if (node->type == BTRFS_TREE_BLOCK_REF_KEY || @@ -2207,10 +2197,6 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head) return NULL; } -/* - * Returns 0 on success or if called with an already aborted transaction. - * Returns -ENOMEM or -EIO on failure and will abort the transaction. - */ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct list_head *cluster) @@ -2299,13 +2285,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ret = run_delayed_extent_op(trans, root, ref, extent_op); + BUG_ON(ret); kfree(extent_op); - if (ret) { - printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret); - return ret; - } - goto next; } @@ -2326,16 +2308,11 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ret = run_one_delayed_ref(trans, root, ref, extent_op, must_insert_reserved); + BUG_ON(ret); btrfs_put_delayed_ref(ref); kfree(extent_op); count++; - - if (ret) { - printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret); - return ret; - } - next: do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, @@ -2370,9 +2347,6 @@ static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, * 0, which means to process everything in the tree at the start * of the run (but not newly added entries), or it can be some target * number you'd like to process. - * - * Returns 0 on success or if called with an aborted transaction - * Returns <0 on error and aborts the transaction */ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count) @@ -2388,10 +2362,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long num_refs = 0; int consider_waiting; - /* We'll clean this up in btrfs_cleanup_transaction */ - if (trans->aborted) - return 0; - if (root == root->fs_info->extent_root) root = root->fs_info->tree_root; @@ -2449,11 +2419,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, } ret = run_clustered_refs(trans, root, &cluster); - if (ret < 0) { - spin_unlock(&delayed_refs->lock); - btrfs_abort_transaction(trans, root, ret); - return ret; - } + BUG_ON(ret < 0); count -= min_t(unsigned long, ret, count); @@ -2618,7 +2584,7 @@ static noinline int check_committed_ref(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto out; - BUG_ON(ret == 0); /* Corruption */ + BUG_ON(ret == 0); ret = -ENOENT; if (path->slots[0] == 0) @@ -2772,6 +2738,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, } return 0; fail: + BUG(); return ret; } @@ -2800,7 +2767,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); if (ret < 0) goto fail; - BUG_ON(ret); /* Corruption */ + BUG_ON(ret); leaf = path->nodes[0]; bi = btrfs_item_ptr_offset(leaf, path->slots[0]); @@ -2808,10 +2775,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); fail: - if (ret) { - btrfs_abort_transaction(trans, root, ret); + if (ret) return ret; - } return 0; } @@ -2984,8 +2949,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (last == 0) { err = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (err) /* File system offline */ - goto out; + BUG_ON(err); } cache = btrfs_lookup_first_block_group(root->fs_info, last); @@ -3012,9 +2976,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, last = cache->key.objectid + cache->key.offset; err = write_one_cache_group(trans, root, path, cache); - if (err) /* File system offline */ - goto out; - + BUG_ON(err); btrfs_put_block_group(cache); } @@ -3027,8 +2989,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (last == 0) { err = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (err) /* File system offline */ - goto out; + BUG_ON(err); } cache = btrfs_lookup_first_block_group(root->fs_info, last); @@ -3053,21 +3014,20 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, continue; } - err = btrfs_write_out_cache(root, trans, cache, path); + btrfs_write_out_cache(root, trans, cache, path); /* * If we didn't have an error then the cache state is still * NEED_WRITE, so we can set it to WRITTEN. */ - if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE) + if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) cache->disk_cache_state = BTRFS_DC_WRITTEN; last = cache->key.objectid + cache->key.offset; btrfs_put_block_group(cache); } -out: btrfs_free_path(path); - return err; + return 0; } int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) @@ -3138,8 +3098,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) { - u64 extra_flags = chunk_to_extended(flags) & - BTRFS_EXTENDED_PROFILE_MASK; + u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK; + + /* chunk -> extended profile */ + if (extra_flags == 0) + extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE; if (flags & BTRFS_BLOCK_GROUP_DATA) fs_info->avail_data_alloc_bits |= extra_flags; @@ -3149,35 +3112,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) fs_info->avail_system_alloc_bits |= extra_flags; } -/* - * returns target flags in extended format or 0 if restripe for this - * chunk_type is not in progress - */ -static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags) -{ - struct btrfs_balance_control *bctl = fs_info->balance_ctl; - u64 target = 0; - - BUG_ON(!mutex_is_locked(&fs_info->volume_mutex) && - !spin_is_locked(&fs_info->balance_lock)); - - if (!bctl) - return 0; - - if (flags & BTRFS_BLOCK_GROUP_DATA && - bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) { - target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target; - } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM && - bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { - target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target; - } else if (flags & BTRFS_BLOCK_GROUP_METADATA && - bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) { - target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target; - } - - return target; -} - /* * @flags: available profiles in extended format (see ctree.h) * @@ -3194,19 +3128,31 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) */ u64 num_devices = root->fs_info->fs_devices->rw_devices + root->fs_info->fs_devices->missing_devices; - u64 target; - /* - * see if restripe for this chunk_type is in progress, if so - * try to reduce to the target profile - */ + /* pick restriper's target profile if it's available */ spin_lock(&root->fs_info->balance_lock); - target = get_restripe_target(root->fs_info, flags); - if (target) { - /* pick target profile only if it's already available */ - if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) { + if (root->fs_info->balance_ctl) { + struct btrfs_balance_control *bctl = root->fs_info->balance_ctl; + u64 tgt = 0; + + if ((flags & BTRFS_BLOCK_GROUP_DATA) && + (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (flags & bctl->data.target)) { + tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target; + } else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) && + (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (flags & bctl->sys.target)) { + tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target; + } else if ((flags & BTRFS_BLOCK_GROUP_METADATA) && + (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (flags & bctl->meta.target)) { + tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target; + } + + if (tgt) { spin_unlock(&root->fs_info->balance_lock); - return extended_to_chunk(target); + flags = tgt; + goto out; } } spin_unlock(&root->fs_info->balance_lock); @@ -3234,7 +3180,10 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) flags &= ~BTRFS_BLOCK_GROUP_RAID0; } - return extended_to_chunk(flags); +out: + /* extended -> chunk profile */ + flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; + return flags; } static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) @@ -3363,7 +3312,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) } data_sinfo->bytes_may_use += bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", - data_sinfo->flags, bytes, 1); + (u64)(unsigned long)data_sinfo, + bytes, 1); spin_unlock(&data_sinfo->lock); return 0; @@ -3384,7 +3334,8 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) spin_lock(&data_sinfo->lock); data_sinfo->bytes_may_use -= bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", - data_sinfo->flags, bytes, 0); + (u64)(unsigned long)data_sinfo, + bytes, 0); spin_unlock(&data_sinfo->lock); } @@ -3445,50 +3396,6 @@ static int should_alloc_chunk(struct btrfs_root *root, return 1; } -static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type) -{ - u64 num_dev; - - if (type & BTRFS_BLOCK_GROUP_RAID10 || - type & BTRFS_BLOCK_GROUP_RAID0) - num_dev = root->fs_info->fs_devices->rw_devices; - else if (type & BTRFS_BLOCK_GROUP_RAID1) - num_dev = 2; - else - num_dev = 1; /* DUP or single */ - - /* metadata for updaing devices and chunk tree */ - return btrfs_calc_trans_metadata_size(root, num_dev + 1); -} - -static void check_system_chunk(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 type) -{ - struct btrfs_space_info *info; - u64 left; - u64 thresh; - - info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM); - spin_lock(&info->lock); - left = info->total_bytes - info->bytes_used - info->bytes_pinned - - info->bytes_reserved - info->bytes_readonly; - spin_unlock(&info->lock); - - thresh = get_system_chunk_thresh(root, type); - if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) { - printk(KERN_INFO "left=%llu, need=%llu, flags=%llu\n", - left, thresh, type); - dump_space_info(info, 0, 0); - } - - if (left < thresh) { - u64 flags; - - flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0); - btrfs_alloc_chunk(trans, root, flags); - } -} - static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags, int force) @@ -3498,13 +3405,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, int wait_for_alloc = 0; int ret = 0; + BUG_ON(!profile_is_valid(flags, 0)); + space_info = __find_space_info(extent_root->fs_info, flags); if (!space_info) { ret = update_space_info(extent_root->fs_info, flags, 0, 0, &space_info); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } - BUG_ON(!space_info); /* Logic error */ + BUG_ON(!space_info); again: spin_lock(&space_info->lock); @@ -3559,12 +3468,6 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, force_metadata_allocation(fs_info); } - /* - * Check if we have enough space in SYSTEM chunk because we may need - * to update devices. - */ - check_system_chunk(trans, extent_root, flags); - ret = btrfs_alloc_chunk(trans, extent_root, flags); if (ret < 0 && ret != -ENOSPC) goto out; @@ -3775,10 +3678,8 @@ static int reserve_metadata_bytes(struct btrfs_root *root, ret = wait_event_interruptible(space_info->wait, !space_info->flush); /* Must have been interrupted, return */ - if (ret) { - printk(KERN_DEBUG "btrfs: %s returning -EINTR\n", __func__); + if (ret) return -EINTR; - } spin_lock(&space_info->lock); } @@ -3799,7 +3700,9 @@ static int reserve_metadata_bytes(struct btrfs_root *root, if (used + orig_bytes <= space_info->total_bytes) { space_info->bytes_may_use += orig_bytes; trace_btrfs_space_reservation(root->fs_info, - "space_info", space_info->flags, orig_bytes, 1); + "space_info", + (u64)(unsigned long)space_info, + orig_bytes, 1); ret = 0; } else { /* @@ -3868,7 +3771,9 @@ static int reserve_metadata_bytes(struct btrfs_root *root, if (used + num_bytes < space_info->total_bytes + avail) { space_info->bytes_may_use += orig_bytes; trace_btrfs_space_reservation(root->fs_info, - "space_info", space_info->flags, orig_bytes, 1); + "space_info", + (u64)(unsigned long)space_info, + orig_bytes, 1); ret = 0; } else { wait_ordered = true; @@ -3931,9 +3836,8 @@ static int reserve_metadata_bytes(struct btrfs_root *root, return ret; } -static struct btrfs_block_rsv *get_block_rsv( - const struct btrfs_trans_handle *trans, - const struct btrfs_root *root) +static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { struct btrfs_block_rsv *block_rsv = NULL; @@ -4014,7 +3918,8 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, spin_lock(&space_info->lock); space_info->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - space_info->flags, num_bytes, 0); + (u64)(unsigned long)space_info, + num_bytes, 0); space_info->reservation_progress++; spin_unlock(&space_info->lock); } @@ -4232,14 +4137,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) block_rsv->reserved += num_bytes; sinfo->bytes_may_use += num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - sinfo->flags, num_bytes, 1); + (u64)(unsigned long)sinfo, num_bytes, 1); } if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; sinfo->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - sinfo->flags, num_bytes, 0); + (u64)(unsigned long)sinfo, num_bytes, 0); sinfo->reservation_progress++; block_rsv->reserved = block_rsv->size; block_rsv->full = 1; @@ -4293,12 +4198,12 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, return; trace_btrfs_space_reservation(root->fs_info, "transaction", - trans->transid, trans->bytes_reserved, 0); + (u64)(unsigned long)trans, + trans->bytes_reserved, 0); btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); trans->bytes_reserved = 0; } -/* Can only return 0 or -ENOSPC */ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, struct inode *inode) { @@ -4635,7 +4540,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, while (total) { cache = btrfs_lookup_block_group(info, bytenr); if (!cache) - return -ENOENT; + return -1; if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) @@ -4738,7 +4643,7 @@ int btrfs_pin_extent(struct btrfs_root *root, struct btrfs_block_group_cache *cache; cache = btrfs_lookup_block_group(root->fs_info, bytenr); - BUG_ON(!cache); /* Logic error */ + BUG_ON(!cache); pin_down_extent(root, cache, bytenr, num_bytes, reserved); @@ -4756,7 +4661,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *cache; cache = btrfs_lookup_block_group(root->fs_info, bytenr); - BUG_ON(!cache); /* Logic error */ + BUG_ON(!cache); /* * pull in the free space cache (if any) so that our pin @@ -4801,7 +4706,6 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, { struct btrfs_space_info *space_info = cache->space_info; int ret = 0; - spin_lock(&space_info->lock); spin_lock(&cache->lock); if (reserve != RESERVE_FREE) { @@ -4812,8 +4716,9 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, space_info->bytes_reserved += num_bytes; if (reserve == RESERVE_ALLOC) { trace_btrfs_space_reservation(cache->fs_info, - "space_info", space_info->flags, - num_bytes, 0); + "space_info", + (u64)(unsigned long)space_info, + num_bytes, 0); space_info->bytes_may_use -= num_bytes; } } @@ -4829,7 +4734,7 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, return ret; } -void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, +int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -4859,6 +4764,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, up_write(&fs_info->extent_commit_sem); update_global_block_rsv(fs_info); + return 0; } static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) @@ -4873,7 +4779,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) if (cache) btrfs_put_block_group(cache); cache = btrfs_lookup_block_group(fs_info, start); - BUG_ON(!cache); /* Logic error */ + BUG_ON(!cache); } len = cache->key.objectid + cache->key.offset - start; @@ -4910,9 +4816,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, u64 end; int ret; - if (trans->aborted) - return 0; - if (fs_info->pinned_extents == &fs_info->freed_extents[0]) unpin = &fs_info->freed_extents[1]; else @@ -4998,8 +4901,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = remove_extent_backref(trans, extent_root, path, NULL, refs_to_drop, is_data); - if (ret) - goto abort; + BUG_ON(ret); btrfs_release_path(path); path->leave_spinning = 1; @@ -5017,11 +4919,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, btrfs_print_leaf(extent_root, path->nodes[0]); } - if (ret < 0) - goto abort; + BUG_ON(ret); extent_slot = path->slots[0]; } - } else if (ret == -ENOENT) { + } else { btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); printk(KERN_ERR "btrfs unable to find ref byte nr %llu " @@ -5031,8 +4932,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, (unsigned long long)root_objectid, (unsigned long long)owner_objectid, (unsigned long long)owner_offset); - } else { - goto abort; } leaf = path->nodes[0]; @@ -5042,8 +4941,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, BUG_ON(found_extent || extent_slot != path->slots[0]); ret = convert_extent_item_v0(trans, extent_root, path, owner_objectid, 0); - if (ret < 0) - goto abort; + BUG_ON(ret < 0); btrfs_release_path(path); path->leave_spinning = 1; @@ -5060,8 +4958,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, (unsigned long long)bytenr); btrfs_print_leaf(extent_root, path->nodes[0]); } - if (ret < 0) - goto abort; + BUG_ON(ret); extent_slot = path->slots[0]; leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, extent_slot); @@ -5098,8 +4995,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = remove_extent_backref(trans, extent_root, path, iref, refs_to_drop, is_data); - if (ret) - goto abort; + BUG_ON(ret); } } else { if (found_extent) { @@ -5116,27 +5012,23 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = btrfs_del_items(trans, extent_root, path, path->slots[0], num_to_del); - if (ret) - goto abort; + BUG_ON(ret); btrfs_release_path(path); if (is_data) { ret = btrfs_del_csums(trans, root, bytenr, num_bytes); - if (ret) - goto abort; + BUG_ON(ret); + } else { + invalidate_mapping_pages(info->btree_inode->i_mapping, + bytenr >> PAGE_CACHE_SHIFT, + (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); } ret = update_block_group(trans, root, bytenr, num_bytes, 0); - if (ret) - goto abort; + BUG_ON(ret); } -out: btrfs_free_path(path); return ret; - -abort: - btrfs_abort_transaction(trans, extent_root, ret); - goto out; } /* @@ -5232,7 +5124,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, parent, root->root_key.objectid, btrfs_header_level(buf), BTRFS_DROP_DELAYED_REF, NULL, for_cow); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } if (!last_ref) @@ -5266,7 +5158,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, btrfs_put_block_group(cache); } -/* Can return -ENOMEM */ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner, u64 offset, int for_cow) @@ -5288,12 +5179,14 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, num_bytes, parent, root_objectid, (int)owner, BTRFS_DROP_DELAYED_REF, NULL, for_cow); + BUG_ON(ret); } else { ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, owner, offset, BTRFS_DROP_DELAYED_REF, NULL, for_cow); + BUG_ON(ret); } return ret; } @@ -5350,34 +5243,28 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) return 0; } -static int __get_block_group_index(u64 flags) +static int get_block_group_index(struct btrfs_block_group_cache *cache) { int index; - - if (flags & BTRFS_BLOCK_GROUP_RAID10) + if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) index = 0; - else if (flags & BTRFS_BLOCK_GROUP_RAID1) + else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) index = 1; - else if (flags & BTRFS_BLOCK_GROUP_DUP) + else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) index = 2; - else if (flags & BTRFS_BLOCK_GROUP_RAID0) + else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) index = 3; else index = 4; - return index; } -static int get_block_group_index(struct btrfs_block_group_cache *cache) -{ - return __get_block_group_index(cache->flags); -} - enum btrfs_loop_type { - LOOP_CACHING_NOWAIT = 0, - LOOP_CACHING_WAIT = 1, - LOOP_ALLOC_CHUNK = 2, - LOOP_NO_EMPTY_SIZE = 3, + LOOP_FIND_IDEAL = 0, + LOOP_CACHING_NOWAIT = 1, + LOOP_CACHING_WAIT = 2, + LOOP_ALLOC_CHUNK = 3, + LOOP_NO_EMPTY_SIZE = 4, }; /* @@ -5391,6 +5278,7 @@ enum btrfs_loop_type { static noinline int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_bytes, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_byte, struct btrfs_key *ins, u64 data) { @@ -5399,7 +5287,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_free_cluster *last_ptr = NULL; struct btrfs_block_group_cache *block_group = NULL; struct btrfs_block_group_cache *used_block_group; - u64 search_start = 0; int empty_cluster = 2 * 1024 * 1024; int allowed_chunk_alloc = 0; int done_chunk_alloc = 0; @@ -5413,6 +5300,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, bool failed_alloc = false; bool use_cluster = true; bool have_caching_bg = false; + u64 ideal_cache_percent = 0; + u64 ideal_cache_offset = 0; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -5462,6 +5351,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, empty_cluster = 0; if (search_start == hint_byte) { +ideal_cache: block_group = btrfs_lookup_block_group(root->fs_info, search_start); used_block_group = block_group; @@ -5473,7 +5363,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, * picked out then we don't care that the block group is cached. */ if (block_group && block_group_bits(block_group, data) && - block_group->cached != BTRFS_CACHE_NO) { + (block_group->cached != BTRFS_CACHE_NO || + search_start == ideal_cache_offset)) { down_read(&space_info->groups_sem); if (list_empty(&block_group->list) || block_group->ro) { @@ -5527,13 +5418,44 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, have_block_group: cached = block_group_cache_done(block_group); if (unlikely(!cached)) { + u64 free_percent; + found_uncached_bg = true; ret = cache_block_group(block_group, trans, - orig_root, 0); - BUG_ON(ret < 0); - ret = 0; + orig_root, 1); + if (block_group->cached == BTRFS_CACHE_FINISHED) + goto alloc; + + free_percent = btrfs_block_group_used(&block_group->item); + free_percent *= 100; + free_percent = div64_u64(free_percent, + block_group->key.offset); + free_percent = 100 - free_percent; + if (free_percent > ideal_cache_percent && + likely(!block_group->ro)) { + ideal_cache_offset = block_group->key.objectid; + ideal_cache_percent = free_percent; + } + + /* + * The caching workers are limited to 2 threads, so we + * can queue as much work as we care to. + */ + if (loop > LOOP_FIND_IDEAL) { + ret = cache_block_group(block_group, trans, + orig_root, 0); + BUG_ON(ret); + } + + /* + * If loop is set for cached only, try the next block + * group. + */ + if (loop == LOOP_FIND_IDEAL) + goto loop; } +alloc: if (unlikely(block_group->ro)) goto loop; @@ -5684,6 +5606,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, } checks: search_start = stripe_align(root, offset); + /* move on to the next group */ + if (search_start + num_bytes >= search_end) { + btrfs_add_free_space(used_block_group, offset, num_bytes); + goto loop; + } /* move on to the next group */ if (search_start + num_bytes > @@ -5734,7 +5661,9 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) goto search; - /* + /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for + * for them to make caching progress. Also + * determine the best possible bg to cache * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking * caching kthreads as we move along * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching @@ -5744,17 +5673,50 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, */ if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { index = 0; + if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { + found_uncached_bg = false; + loop++; + if (!ideal_cache_percent) + goto search; + + /* + * 1 of the following 2 things have happened so far + * + * 1) We found an ideal block group for caching that + * is mostly full and will cache quickly, so we might + * as well wait for it. + * + * 2) We searched for cached only and we didn't find + * anything, and we didn't start any caching kthreads + * either, so chances are we will loop through and + * start a couple caching kthreads, and then come back + * around and just wait for them. This will be slower + * because we will have 2 caching kthreads reading at + * the same time when we could have just started one + * and waited for it to get far enough to give us an + * allocation, so go ahead and go to the wait caching + * loop. + */ + loop = LOOP_CACHING_WAIT; + search_start = ideal_cache_offset; + ideal_cache_percent = 0; + goto ideal_cache; + } else if (loop == LOOP_FIND_IDEAL) { + /* + * Didn't find a uncached bg, wait on anything we find + * next. + */ + loop = LOOP_CACHING_WAIT; + goto search; + } + loop++; + if (loop == LOOP_ALLOC_CHUNK) { if (allowed_chunk_alloc) { ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, data, CHUNK_ALLOC_LIMITED); - if (ret < 0) { - btrfs_abort_transaction(trans, - root, ret); - goto out; - } allowed_chunk_alloc = 0; if (ret == 1) done_chunk_alloc = 1; @@ -5783,7 +5745,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, } else if (ins->objectid) { ret = 0; } -out: return ret; } @@ -5837,10 +5798,12 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, - struct btrfs_key *ins, u64 data) + u64 search_end, struct btrfs_key *ins, + u64 data) { bool final_tried = false; int ret; + u64 search_start = 0; data = btrfs_get_alloc_profile(root, data); again: @@ -5848,31 +5811,23 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, * the only place that sets empty_size is btrfs_realloc_node, which * is not called recursively on allocations */ - if (empty_size || root->ref_cows) { + if (empty_size || root->ref_cows) ret = do_chunk_alloc(trans, root->fs_info->extent_root, num_bytes + 2 * 1024 * 1024, data, CHUNK_ALLOC_NO_FORCE); - if (ret < 0 && ret != -ENOSPC) { - btrfs_abort_transaction(trans, root, ret); - return ret; - } - } WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, - hint_byte, ins, data); + search_start, search_end, hint_byte, + ins, data); if (ret == -ENOSPC) { if (!final_tried) { num_bytes = num_bytes >> 1; num_bytes = num_bytes & ~(root->sectorsize - 1); num_bytes = max(num_bytes, min_alloc_size); - ret = do_chunk_alloc(trans, root->fs_info->extent_root, + do_chunk_alloc(trans, root->fs_info->extent_root, num_bytes, data, CHUNK_ALLOC_FORCE); - if (ret < 0 && ret != -ENOSPC) { - btrfs_abort_transaction(trans, root, ret); - return ret; - } if (num_bytes == min_alloc_size) final_tried = true; goto again; @@ -5883,8 +5838,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, printk(KERN_ERR "btrfs allocation failed flags %llu, " "wanted %llu\n", (unsigned long long)data, (unsigned long long)num_bytes); - if (sinfo) - dump_space_info(sinfo, num_bytes, 1); + dump_space_info(sinfo, num_bytes, 1); } } @@ -5963,10 +5917,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, ins, size); - if (ret) { - btrfs_free_path(path); - return ret; - } + BUG_ON(ret); leaf = path->nodes[0]; extent_item = btrfs_item_ptr(leaf, path->slots[0], @@ -5996,7 +5947,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_free_path(path); ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); - if (ret) { /* -ENOENT, logic error */ + if (ret) { printk(KERN_ERR "btrfs update block group failed for %llu " "%llu\n", (unsigned long long)ins->objectid, (unsigned long long)ins->offset); @@ -6027,10 +5978,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, ins, size); - if (ret) { - btrfs_free_path(path); - return ret; - } + BUG_ON(ret); leaf = path->nodes[0]; extent_item = btrfs_item_ptr(leaf, path->slots[0], @@ -6060,7 +6008,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, btrfs_free_path(path); ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); - if (ret) { /* -ENOENT, logic error */ + if (ret) { printk(KERN_ERR "btrfs update block group failed for %llu " "%llu\n", (unsigned long long)ins->objectid, (unsigned long long)ins->offset); @@ -6108,28 +6056,28 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, if (!caching_ctl) { BUG_ON(!block_group_cache_done(block_group)); ret = btrfs_remove_free_space(block_group, start, num_bytes); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } else { mutex_lock(&caching_ctl->mutex); if (start >= caching_ctl->progress) { ret = add_excluded_extent(root, start, num_bytes); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } else if (start + num_bytes <= caching_ctl->progress) { ret = btrfs_remove_free_space(block_group, start, num_bytes); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } else { num_bytes = caching_ctl->progress - start; ret = btrfs_remove_free_space(block_group, start, num_bytes); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); start = caching_ctl->progress; num_bytes = ins->objectid + ins->offset - caching_ctl->progress; ret = add_excluded_extent(root, start, num_bytes); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } mutex_unlock(&caching_ctl->mutex); @@ -6138,7 +6086,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_update_reserved_bytes(block_group, ins->offset, RESERVE_ALLOC_NO_ACCOUNT); - BUG_ON(ret); /* logic error */ + BUG_ON(ret); btrfs_put_block_group(block_group); ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); @@ -6159,7 +6107,6 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); clean_tree_block(trans, root, buf); - clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); btrfs_set_lock_blocking(buf); btrfs_set_buffer_uptodate(buf); @@ -6267,7 +6214,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_CAST(block_rsv); ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, - empty_size, hint, &ins, 0); + empty_size, hint, (u64)-1, &ins, 0); if (ret) { unuse_block_rsv(root->fs_info, block_rsv, blocksize); return ERR_PTR(ret); @@ -6275,7 +6222,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize, level); - BUG_ON(IS_ERR(buf)); /* -ENOMEM */ + BUG_ON(IS_ERR(buf)); if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { if (parent == 0) @@ -6287,7 +6234,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { struct btrfs_delayed_extent_op *extent_op; extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); - BUG_ON(!extent_op); /* -ENOMEM */ + BUG_ON(!extent_op); if (key) memcpy(&extent_op->key, key, sizeof(extent_op->key)); else @@ -6302,7 +6249,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, ins.offset, parent, root_objectid, level, BTRFS_ADD_DELAYED_EXTENT, extent_op, for_cow); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } return buf; } @@ -6372,9 +6319,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, /* We don't lock the tree block, it's OK to be racy here */ ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, &refs, &flags); - /* We don't care about errors in readahead. */ - if (ret < 0) - continue; + BUG_ON(ret); BUG_ON(refs == 0); if (wc->stage == DROP_REFERENCE) { @@ -6441,9 +6386,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, eb->start, eb->len, &wc->refs[level], &wc->flags[level]); - BUG_ON(ret == -ENOMEM); - if (ret) - return ret; + BUG_ON(ret); BUG_ON(wc->refs[level] == 0); } @@ -6462,12 +6405,12 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, if (!(wc->flags[level] & flag)) { BUG_ON(!path->locks[level]); ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); ret = btrfs_set_disk_extent_flags(trans, root, eb->start, eb->len, flag, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); wc->flags[level] |= flag; } @@ -6539,11 +6482,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, &wc->refs[level - 1], &wc->flags[level - 1]); - if (ret < 0) { - btrfs_tree_unlock(next); - return ret; - } - + BUG_ON(ret); BUG_ON(wc->refs[level - 1] == 0); *lookup_info = 0; @@ -6612,7 +6551,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, root->root_key.objectid, level - 1, 0, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } btrfs_tree_unlock(next); free_extent_buffer(next); @@ -6670,10 +6609,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, eb->start, eb->len, &wc->refs[level], &wc->flags[level]); - if (ret < 0) { - btrfs_tree_unlock_rw(eb, path->locks[level]); - return ret; - } + BUG_ON(ret); BUG_ON(wc->refs[level] == 0); if (wc->refs[level] == 1) { btrfs_tree_unlock_rw(eb, path->locks[level]); @@ -6693,7 +6629,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, else ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } /* make block locked assertion in clean_tree_block happy */ if (!path->locks[level] && @@ -6802,7 +6738,7 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, * also make sure backrefs for the shared block and all lower level * blocks are properly updated. */ -int btrfs_drop_snapshot(struct btrfs_root *root, +void btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, int update_ref, int for_reloc) { @@ -6830,10 +6766,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } trans = btrfs_start_transaction(tree_root, 0); - if (IS_ERR(trans)) { - err = PTR_ERR(trans); - goto out_free; - } + BUG_ON(IS_ERR(trans)); if (block_rsv) trans->block_rsv = block_rsv; @@ -6858,7 +6791,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, path->lowest_level = 0; if (ret < 0) { err = ret; - goto out_end_trans; + goto out_free; } WARN_ON(ret > 0); @@ -6878,10 +6811,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, path->nodes[level]->len, &wc->refs[level], &wc->flags[level]); - if (ret < 0) { - err = ret; - goto out_end_trans; - } + BUG_ON(ret); BUG_ON(wc->refs[level] == 0); if (level == root_item->drop_level) @@ -6932,40 +6862,26 @@ int btrfs_drop_snapshot(struct btrfs_root *root, ret = btrfs_update_root(trans, tree_root, &root->root_key, root_item); - if (ret) { - btrfs_abort_transaction(trans, tree_root, ret); - err = ret; - goto out_end_trans; - } + BUG_ON(ret); btrfs_end_transaction_throttle(trans, tree_root); trans = btrfs_start_transaction(tree_root, 0); - if (IS_ERR(trans)) { - err = PTR_ERR(trans); - goto out_free; - } + BUG_ON(IS_ERR(trans)); if (block_rsv) trans->block_rsv = block_rsv; } } btrfs_release_path(path); - if (err) - goto out_end_trans; + BUG_ON(err); ret = btrfs_del_root(trans, tree_root, &root->root_key); - if (ret) { - btrfs_abort_transaction(trans, tree_root, ret); - goto out_end_trans; - } + BUG_ON(ret); if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { ret = btrfs_find_last_root(tree_root, root->root_key.objectid, NULL, NULL); - if (ret < 0) { - btrfs_abort_transaction(trans, tree_root, ret); - err = ret; - goto out_end_trans; - } else if (ret > 0) { + BUG_ON(ret < 0); + if (ret > 0) { /* if we fail to delete the orphan item this time * around, it'll get picked up the next time. * @@ -6983,15 +6899,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, free_extent_buffer(root->commit_root); kfree(root); } -out_end_trans: - btrfs_end_transaction_throttle(trans, tree_root); out_free: + btrfs_end_transaction_throttle(trans, tree_root); kfree(wc); btrfs_free_path(path); out: if (err) btrfs_std_error(root->fs_info, err); - return err; + return; } /* @@ -7068,15 +6983,31 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) { u64 num_devices; - u64 stripped; + u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; - /* - * if restripe for this chunk_type is on pick target profile and - * return, otherwise do the usual balance - */ - stripped = get_restripe_target(root->fs_info, flags); - if (stripped) - return extended_to_chunk(stripped); + if (root->fs_info->balance_ctl) { + struct btrfs_balance_control *bctl = root->fs_info->balance_ctl; + u64 tgt = 0; + + /* pick restriper's target profile and return */ + if (flags & BTRFS_BLOCK_GROUP_DATA && + bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) { + tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target; + } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM && + bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { + tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target; + } else if (flags & BTRFS_BLOCK_GROUP_METADATA && + bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) { + tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target; + } + + if (tgt) { + /* extended -> chunk profile */ + tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; + return tgt; + } + } /* * we add in the count of missing devices because we want @@ -7086,9 +7017,6 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) num_devices = root->fs_info->fs_devices->rw_devices + root->fs_info->fs_devices->missing_devices; - stripped = BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; - if (num_devices == 1) { stripped |= BTRFS_BLOCK_GROUP_DUP; stripped = flags & ~stripped; @@ -7101,6 +7029,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) if (flags & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) return stripped | BTRFS_BLOCK_GROUP_DUP; + return flags; } else { /* they already had raid on here, just return */ if (flags & stripped) @@ -7113,9 +7042,9 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) if (flags & BTRFS_BLOCK_GROUP_DUP) return stripped | BTRFS_BLOCK_GROUP_RAID1; - /* this is drive concat, leave it alone */ + /* turn single device chunks into raid0 */ + return stripped | BTRFS_BLOCK_GROUP_RAID0; } - return flags; } @@ -7174,16 +7103,12 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, BUG_ON(cache->ro); trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); + BUG_ON(IS_ERR(trans)); alloc_flags = update_block_group_flags(root, cache->flags); - if (alloc_flags != cache->flags) { - ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, - CHUNK_ALLOC_FORCE); - if (ret < 0) - goto out; - } + if (alloc_flags != cache->flags) + do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, + CHUNK_ALLOC_FORCE); ret = set_block_group_ro(cache, 0); if (!ret) @@ -7263,7 +7188,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) return free_bytes; } -void btrfs_set_block_group_rw(struct btrfs_root *root, +int btrfs_set_block_group_rw(struct btrfs_root *root, struct btrfs_block_group_cache *cache) { struct btrfs_space_info *sinfo = cache->space_info; @@ -7279,6 +7204,7 @@ void btrfs_set_block_group_rw(struct btrfs_root *root, cache->ro = 0; spin_unlock(&cache->lock); spin_unlock(&sinfo->lock); + return 0; } /* @@ -7296,7 +7222,6 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) u64 min_free; u64 dev_min = 1; u64 dev_nr = 0; - u64 target; int index; int full = 0; int ret = 0; @@ -7337,11 +7262,13 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) /* * ok we don't have enough space, but maybe we have free space on our * devices to allocate new chunks for relocation, so loop through our - * alloc devices and guess if we have enough space. if this block - * group is going to be restriped, run checks against the target - * profile instead of the current one. + * alloc devices and guess if we have enough space. However, if we + * were marked as full, then we know there aren't enough chunks, and we + * can just return. */ ret = -1; + if (full) + goto out; /* * index: @@ -7351,20 +7278,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) * 3: raid0 * 4: single */ - target = get_restripe_target(root->fs_info, block_group->flags); - if (target) { - index = __get_block_group_index(extended_to_chunk(target)); - } else { - /* - * this is just a balance, so if we were marked as full - * we know there is no space for a new chunk - */ - if (full) - goto out; - - index = get_block_group_index(block_group); - } - + index = get_block_group_index(block_group); if (index == 0) { dev_min = 4; /* Divide by 2 */ @@ -7658,7 +7572,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), &space_info); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); cache->space_info = space_info; spin_lock(&cache->space_info->lock); cache->space_info->bytes_readonly += cache->bytes_super; @@ -7667,7 +7581,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) __link_block_group(space_info, cache); ret = btrfs_add_block_group_cache(root->fs_info, cache); - BUG_ON(ret); /* Logic error */ + BUG_ON(ret); set_avail_alloc_bits(root->fs_info, cache->flags); if (btrfs_chunk_readonly(root, cache->key.objectid)) @@ -7749,7 +7663,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, &cache->space_info); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); update_global_block_rsv(root->fs_info); spin_lock(&cache->space_info->lock); @@ -7759,14 +7673,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, __link_block_group(cache->space_info, cache); ret = btrfs_add_block_group_cache(root->fs_info, cache); - BUG_ON(ret); /* Logic error */ + BUG_ON(ret); ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, sizeof(cache->item)); - if (ret) { - btrfs_abort_transaction(trans, extent_root, ret); - return ret; - } + BUG_ON(ret); set_avail_alloc_bits(extent_root->fs_info, type); @@ -7775,8 +7686,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) { - u64 extra_flags = chunk_to_extended(flags) & - BTRFS_EXTENDED_PROFILE_MASK; + u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK; + + /* chunk -> extended profile */ + if (extra_flags == 0) + extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE; if (flags & BTRFS_BLOCK_GROUP_DATA) fs_info->avail_data_alloc_bits &= ~extra_flags; @@ -7844,10 +7758,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, inode = lookup_free_space_inode(tree_root, block_group, path); if (!IS_ERR(inode)) { ret = btrfs_orphan_add(trans, inode); - if (ret) { - btrfs_add_delayed_iput(inode); - goto out; - } + BUG_ON(ret); clear_nlink(inode); /* One for the block groups ref */ spin_lock(&block_group->lock); diff --git a/trunk/fs/btrfs/extent_io.c b/trunk/fs/btrfs/extent_io.c index 8d904dd7ea9f..2862454bcdb3 100644 --- a/trunk/fs/btrfs/extent_io.c +++ b/trunk/fs/btrfs/extent_io.c @@ -19,7 +19,6 @@ #include "btrfs_inode.h" #include "volumes.h" #include "check-integrity.h" -#include "locking.h" static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -54,13 +53,6 @@ struct extent_page_data { unsigned int sync_io:1; }; -static noinline void flush_write_bio(void *data); -static inline struct btrfs_fs_info * -tree_fs_info(struct extent_io_tree *tree) -{ - return btrfs_sb(tree->mapping->host->i_sb); -} - int __init extent_io_init(void) { extent_state_cache = kmem_cache_create("extent_state", @@ -144,7 +136,6 @@ static struct extent_state *alloc_extent_state(gfp_t mask) #endif atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); - trace_alloc_extent_state(state, mask, _RET_IP_); return state; } @@ -162,7 +153,6 @@ void free_extent_state(struct extent_state *state) list_del(&state->leak_list); spin_unlock_irqrestore(&leak_lock, flags); #endif - trace_free_extent_state(state, _RET_IP_); kmem_cache_free(extent_state_cache, state); } } @@ -449,13 +439,6 @@ alloc_extent_state_atomic(struct extent_state *prealloc) return prealloc; } -void extent_io_tree_panic(struct extent_io_tree *tree, int err) -{ - btrfs_panic(tree_fs_info(tree), err, "Locking error: " - "Extent tree was modified by another " - "thread while locked."); -} - /* * clear some bits on a range in the tree. This may require splitting * or inserting elements in the tree, so the gfp mask is used to @@ -466,7 +449,8 @@ void extent_io_tree_panic(struct extent_io_tree *tree, int err) * * the range [start, end] is inclusive. * - * This takes the tree lock, and returns 0 on success and < 0 on error. + * This takes the tree lock, and returns < 0 on error, > 0 if any of the + * bits were already set, or zero if none of the bits were already set. */ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, int wake, int delete, @@ -480,6 +464,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct rb_node *node; u64 last_end; int err; + int set = 0; int clear = 0; if (delete) @@ -557,14 +542,12 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, prealloc = alloc_extent_state_atomic(prealloc); BUG_ON(!prealloc); err = split_state(tree, state, prealloc, start); - if (err) - extent_io_tree_panic(tree, err); - + BUG_ON(err == -EEXIST); prealloc = NULL; if (err) goto out; if (state->end <= end) { - clear_state_bit(tree, state, &bits, wake); + set |= clear_state_bit(tree, state, &bits, wake); if (last_end == (u64)-1) goto out; start = last_end + 1; @@ -581,19 +564,17 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, prealloc = alloc_extent_state_atomic(prealloc); BUG_ON(!prealloc); err = split_state(tree, state, prealloc, end + 1); - if (err) - extent_io_tree_panic(tree, err); - + BUG_ON(err == -EEXIST); if (wake) wake_up(&state->wq); - clear_state_bit(tree, prealloc, &bits, wake); + set |= clear_state_bit(tree, prealloc, &bits, wake); prealloc = NULL; goto out; } - clear_state_bit(tree, state, &bits, wake); + set |= clear_state_bit(tree, state, &bits, wake); next: if (last_end == (u64)-1) goto out; @@ -610,7 +591,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (prealloc) free_extent_state(prealloc); - return 0; + return set; search_again: if (start > end) @@ -621,8 +602,8 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, goto again; } -static void wait_on_state(struct extent_io_tree *tree, - struct extent_state *state) +static int wait_on_state(struct extent_io_tree *tree, + struct extent_state *state) __releases(tree->lock) __acquires(tree->lock) { @@ -632,6 +613,7 @@ static void wait_on_state(struct extent_io_tree *tree, schedule(); spin_lock(&tree->lock); finish_wait(&state->wq, &wait); + return 0; } /* @@ -639,7 +621,7 @@ static void wait_on_state(struct extent_io_tree *tree, * The range [start, end] is inclusive. * The tree lock is taken by this function */ -void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) +int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) { struct extent_state *state; struct rb_node *node; @@ -676,6 +658,7 @@ void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) } out: spin_unlock(&tree->lock); + return 0; } static void set_state_bits(struct extent_io_tree *tree, @@ -723,10 +706,9 @@ static void uncache_state(struct extent_state **cached_ptr) * [start, end] is inclusive This takes the tree lock. */ -static int __must_check -__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, int exclusive_bits, u64 *failed_start, - struct extent_state **cached_state, gfp_t mask) +int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int exclusive_bits, u64 *failed_start, + struct extent_state **cached_state, gfp_t mask) { struct extent_state *state; struct extent_state *prealloc = NULL; @@ -760,10 +742,8 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, prealloc = alloc_extent_state_atomic(prealloc); BUG_ON(!prealloc); err = insert_state(tree, prealloc, start, end, &bits); - if (err) - extent_io_tree_panic(tree, err); - prealloc = NULL; + BUG_ON(err == -EEXIST); goto out; } state = rb_entry(node, struct extent_state, rb_node); @@ -829,9 +809,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, prealloc = alloc_extent_state_atomic(prealloc); BUG_ON(!prealloc); err = split_state(tree, state, prealloc, start); - if (err) - extent_io_tree_panic(tree, err); - + BUG_ON(err == -EEXIST); prealloc = NULL; if (err) goto out; @@ -868,9 +846,12 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, */ err = insert_state(tree, prealloc, start, this_end, &bits); - if (err) - extent_io_tree_panic(tree, err); - + BUG_ON(err == -EEXIST); + if (err) { + free_extent_state(prealloc); + prealloc = NULL; + goto out; + } cache_state(prealloc, cached_state); prealloc = NULL; start = this_end + 1; @@ -892,8 +873,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, prealloc = alloc_extent_state_atomic(prealloc); BUG_ON(!prealloc); err = split_state(tree, state, prealloc, end + 1); - if (err) - extent_io_tree_panic(tree, err); + BUG_ON(err == -EEXIST); set_state_bits(tree, prealloc, &bits); cache_state(prealloc, cached_state); @@ -920,15 +900,6 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, goto again; } -int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, - u64 *failed_start, struct extent_state **cached_state, - gfp_t mask) -{ - return __set_extent_bit(tree, start, end, bits, 0, failed_start, - cached_state, mask); -} - - /** * convert_extent - convert all bits in a given range from one bit to another * @tree: the io tree to search @@ -975,8 +946,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, } err = insert_state(tree, prealloc, start, end, &bits); prealloc = NULL; - if (err) - extent_io_tree_panic(tree, err); + BUG_ON(err == -EEXIST); goto out; } state = rb_entry(node, struct extent_state, rb_node); @@ -1032,8 +1002,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, goto out; } err = split_state(tree, state, prealloc, start); - if (err) - extent_io_tree_panic(tree, err); + BUG_ON(err == -EEXIST); prealloc = NULL; if (err) goto out; @@ -1072,8 +1041,12 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, */ err = insert_state(tree, prealloc, start, this_end, &bits); - if (err) - extent_io_tree_panic(tree, err); + BUG_ON(err == -EEXIST); + if (err) { + free_extent_state(prealloc); + prealloc = NULL; + goto out; + } prealloc = NULL; start = this_end + 1; goto search_again; @@ -1092,8 +1065,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, } err = split_state(tree, state, prealloc, end + 1); - if (err) - extent_io_tree_panic(tree, err); + BUG_ON(err == -EEXIST); set_state_bits(tree, prealloc, &bits); clear_state_bit(tree, prealloc, &clear_bits, 0); @@ -1123,14 +1095,14 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { - return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL, + return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, NULL, mask); } int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask) { - return set_extent_bit(tree, start, end, bits, NULL, + return set_extent_bit(tree, start, end, bits, 0, NULL, NULL, mask); } @@ -1145,7 +1117,7 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, { return set_extent_bit(tree, start, end, EXTENT_DELALLOC | EXTENT_UPTODATE, - NULL, cached_state, mask); + 0, NULL, cached_state, mask); } int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, @@ -1159,7 +1131,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { - return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, + return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, NULL, mask); } @@ -1167,7 +1139,7 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask) { return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, - cached_state, mask); + NULL, cached_state, mask); } static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, @@ -1183,40 +1155,42 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, * us if waiting is desired. */ int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - int bits, struct extent_state **cached_state) + int bits, struct extent_state **cached_state, gfp_t mask) { int err; u64 failed_start; while (1) { - err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, - EXTENT_LOCKED, &failed_start, - cached_state, GFP_NOFS); - if (err == -EEXIST) { + err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, + EXTENT_LOCKED, &failed_start, + cached_state, mask); + if (err == -EEXIST && (mask & __GFP_WAIT)) { wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); start = failed_start; - } else + } else { break; + } WARN_ON(start > end); } return err; } -int lock_extent(struct extent_io_tree *tree, u64 start, u64 end) +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { - return lock_extent_bits(tree, start, end, 0, NULL); + return lock_extent_bits(tree, start, end, 0, NULL, mask); } -int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) +int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) { int err; u64 failed_start; - err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, - &failed_start, NULL, GFP_NOFS); + err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, + &failed_start, NULL, mask); if (err == -EEXIST) { if (failed_start > start) clear_extent_bit(tree, start, failed_start - 1, - EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); + EXTENT_LOCKED, 1, 0, NULL, mask); return 0; } return 1; @@ -1229,10 +1203,10 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, mask); } -int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, - GFP_NOFS); + mask); } /* @@ -1246,7 +1220,7 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) while (index <= end_index) { page = find_get_page(tree->mapping, index); - BUG_ON(!page); /* Pages should be in the extent_io_tree */ + BUG_ON(!page); set_page_writeback(page); page_cache_release(page); index++; @@ -1369,9 +1343,9 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree, return found; } -static noinline void __unlock_for_delalloc(struct inode *inode, - struct page *locked_page, - u64 start, u64 end) +static noinline int __unlock_for_delalloc(struct inode *inode, + struct page *locked_page, + u64 start, u64 end) { int ret; struct page *pages[16]; @@ -1381,7 +1355,7 @@ static noinline void __unlock_for_delalloc(struct inode *inode, int i; if (index == locked_page->index && end_index == index) - return; + return 0; while (nr_pages > 0) { ret = find_get_pages_contig(inode->i_mapping, index, @@ -1396,6 +1370,7 @@ static noinline void __unlock_for_delalloc(struct inode *inode, index += ret; cond_resched(); } + return 0; } static noinline int lock_delalloc_pages(struct inode *inode, @@ -1525,10 +1500,11 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode, goto out_failed; } } - BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */ + BUG_ON(ret); /* step three, lock the state bits for the whole range */ - lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state); + lock_extent_bits(tree, delalloc_start, delalloc_end, + 0, &cached_state, GFP_NOFS); /* then test to make sure it is all still delalloc */ ret = test_range_bit(tree, delalloc_start, delalloc_end, @@ -1785,34 +1761,39 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, * helper function to set a given page up to date if all the * extents in the tree for that page are up to date */ -static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) +static int check_page_uptodate(struct extent_io_tree *tree, + struct page *page) { u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) SetPageUptodate(page); + return 0; } /* * helper function to unlock a page if all the extents in the tree * for that page are unlocked */ -static void check_page_locked(struct extent_io_tree *tree, struct page *page) +static int check_page_locked(struct extent_io_tree *tree, + struct page *page) { u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) unlock_page(page); + return 0; } /* * helper function to end page writeback if all the extents * in the tree for that page are done with writeback */ -static void check_page_writeback(struct extent_io_tree *tree, - struct page *page) +static int check_page_writeback(struct extent_io_tree *tree, + struct page *page) { end_page_writeback(page); + return 0; } /* @@ -1931,26 +1912,6 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, return 0; } -int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, - int mirror_num) -{ - struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; - u64 start = eb->start; - unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); - int ret; - - for (i = 0; i < num_pages; i++) { - struct page *p = extent_buffer_page(eb, i); - ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE, - start, p, mirror_num); - if (ret) - break; - start += PAGE_CACHE_SIZE; - } - - return ret; -} - /* * each time an IO finishes, we do a fast check in the IO failure tree * to see if we need to process or clean up an io_failure_record @@ -2297,7 +2258,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) u64 start; u64 end; int whole_page; - int failed_mirror; int ret; if (err) @@ -2344,16 +2304,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) else clean_io_failure(start, page); } - - if (!uptodate) + if (!uptodate) { + int failed_mirror; failed_mirror = (int)(unsigned long)bio->bi_bdev; - - if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { - ret = tree->ops->readpage_io_failed_hook(page, failed_mirror); - if (!ret && !err && - test_bit(BIO_UPTODATE, &bio->bi_flags)) - uptodate = 1; - } else if (!uptodate) { /* * The generic bio_readpage_error handles errors the * following way: If possible, new read requests are @@ -2367,6 +2320,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) ret = bio_readpage_error(bio, page, start, end, failed_mirror, NULL); if (ret == 0) { +error_handled: uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) @@ -2374,9 +2328,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err) uncache_state(&cached); continue; } + if (tree->ops && tree->ops->readpage_io_failed_hook) { + ret = tree->ops->readpage_io_failed_hook( + bio, page, start, end, + failed_mirror, state); + if (ret == 0) + goto error_handled; + } } - if (uptodate && tree->track_uptodate) { + if (uptodate) { set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); } @@ -2425,12 +2386,8 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, return bio; } -/* - * Since writes are async, they will only return -ENOMEM. - * Reads can return the full range of I/O error conditions. - */ -static int __must_check submit_one_bio(int rw, struct bio *bio, - int mirror_num, unsigned long bio_flags) +static int submit_one_bio(int rw, struct bio *bio, int mirror_num, + unsigned long bio_flags) { int ret = 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -2456,19 +2413,6 @@ static int __must_check submit_one_bio(int rw, struct bio *bio, return ret; } -static int merge_bio(struct extent_io_tree *tree, struct page *page, - unsigned long offset, size_t size, struct bio *bio, - unsigned long bio_flags) -{ - int ret = 0; - if (tree->ops && tree->ops->merge_bio_hook) - ret = tree->ops->merge_bio_hook(page, offset, size, bio, - bio_flags); - BUG_ON(ret < 0); - return ret; - -} - static int submit_extent_page(int rw, struct extent_io_tree *tree, struct page *page, sector_t sector, size_t size, unsigned long offset, @@ -2497,12 +2441,12 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, sector; if (prev_bio_flags != bio_flags || !contig || - merge_bio(tree, page, offset, page_size, bio, bio_flags) || + (tree->ops && tree->ops->merge_bio_hook && + tree->ops->merge_bio_hook(page, offset, page_size, bio, + bio_flags)) || bio_add_page(bio, page, page_size, offset) < page_size) { ret = submit_one_bio(rw, bio, mirror_num, prev_bio_flags); - if (ret < 0) - return ret; bio = NULL; } else { return 0; @@ -2529,31 +2473,25 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, return ret; } -void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page) +void set_page_extent_mapped(struct page *page) { if (!PagePrivate(page)) { SetPagePrivate(page); page_cache_get(page); - set_page_private(page, (unsigned long)eb); - } else { - WARN_ON(page->private != (unsigned long)eb); + set_page_private(page, EXTENT_PAGE_PRIVATE); } } -void set_page_extent_mapped(struct page *page) +static void set_page_extent_head(struct page *page, unsigned long len) { - if (!PagePrivate(page)) { - SetPagePrivate(page); - page_cache_get(page); - set_page_private(page, EXTENT_PAGE_PRIVATE); - } + WARN_ON(!PagePrivate(page)); + set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); } /* * basic readpage implementation. Locked extent state structs are inserted * into the tree that are removed when the IO is done (by the end_io * handlers) - * XXX JDM: This needs looking at to ensure proper page locking */ static int __extent_read_full_page(struct extent_io_tree *tree, struct page *page, @@ -2593,11 +2531,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree, end = page_end; while (1) { - lock_extent(tree, start, end); + lock_extent(tree, start, end, GFP_NOFS); ordered = btrfs_lookup_ordered_extent(inode, start); if (!ordered) break; - unlock_extent(tree, start, end); + unlock_extent(tree, start, end, GFP_NOFS); btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); } @@ -2634,7 +2572,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, end - cur + 1, 0); if (IS_ERR_OR_NULL(em)) { SetPageError(page); - unlock_extent(tree, cur, end); + unlock_extent(tree, cur, end, GFP_NOFS); break; } extent_offset = cur - em->start; @@ -2686,7 +2624,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1, NULL)) { check_page_uptodate(tree, page); - unlock_extent(tree, cur, cur + iosize - 1); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); cur = cur + iosize; pg_offset += iosize; continue; @@ -2696,7 +2634,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, */ if (block_start == EXTENT_MAP_INLINE) { SetPageError(page); - unlock_extent(tree, cur, cur + iosize - 1); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); cur = cur + iosize; pg_offset += iosize; continue; @@ -2716,7 +2654,6 @@ static int __extent_read_full_page(struct extent_io_tree *tree, end_bio_extent_readpage, mirror_num, *bio_flags, this_bio_flag); - BUG_ON(ret == -ENOMEM); nr++; *bio_flags = this_bio_flag; } @@ -2858,11 +2795,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_end, &page_started, &nr_written); - /* File system has been set read-only */ - if (ret) { - SetPageError(page); - goto done; - } + BUG_ON(ret); /* * delalloc_end is already one less than the total * length, so we don't subtract one from @@ -3035,275 +2968,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, return 0; } -static int eb_wait(void *word) -{ - io_schedule(); - return 0; -} - -static void wait_on_extent_buffer_writeback(struct extent_buffer *eb) -{ - wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait, - TASK_UNINTERRUPTIBLE); -} - -static int lock_extent_buffer_for_io(struct extent_buffer *eb, - struct btrfs_fs_info *fs_info, - struct extent_page_data *epd) -{ - unsigned long i, num_pages; - int flush = 0; - int ret = 0; - - if (!btrfs_try_tree_write_lock(eb)) { - flush = 1; - flush_write_bio(epd); - btrfs_tree_lock(eb); - } - - if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) { - btrfs_tree_unlock(eb); - if (!epd->sync_io) - return 0; - if (!flush) { - flush_write_bio(epd); - flush = 1; - } - while (1) { - wait_on_extent_buffer_writeback(eb); - btrfs_tree_lock(eb); - if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) - break; - btrfs_tree_unlock(eb); - } - } - - if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { - set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); - btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - spin_lock(&fs_info->delalloc_lock); - if (fs_info->dirty_metadata_bytes >= eb->len) - fs_info->dirty_metadata_bytes -= eb->len; - else - WARN_ON(1); - spin_unlock(&fs_info->delalloc_lock); - ret = 1; - } - - btrfs_tree_unlock(eb); - - if (!ret) - return ret; - - num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { - struct page *p = extent_buffer_page(eb, i); - - if (!trylock_page(p)) { - if (!flush) { - flush_write_bio(epd); - flush = 1; - } - lock_page(p); - } - } - - return ret; -} - -static void end_extent_buffer_writeback(struct extent_buffer *eb) -{ - clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); - smp_mb__after_clear_bit(); - wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); -} - -static void end_bio_extent_buffer_writepage(struct bio *bio, int err) -{ - int uptodate = err == 0; - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_buffer *eb; - int done; - - do { - struct page *page = bvec->bv_page; - - bvec--; - eb = (struct extent_buffer *)page->private; - BUG_ON(!eb); - done = atomic_dec_and_test(&eb->io_pages); - - if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { - set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); - ClearPageUptodate(page); - SetPageError(page); - } - - end_page_writeback(page); - - if (!done) - continue; - - end_extent_buffer_writeback(eb); - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); - -} - -static int write_one_eb(struct extent_buffer *eb, - struct btrfs_fs_info *fs_info, - struct writeback_control *wbc, - struct extent_page_data *epd) -{ - struct block_device *bdev = fs_info->fs_devices->latest_bdev; - u64 offset = eb->start; - unsigned long i, num_pages; - int rw = (epd->sync_io ? WRITE_SYNC : WRITE); - int ret; - - clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); - num_pages = num_extent_pages(eb->start, eb->len); - atomic_set(&eb->io_pages, num_pages); - for (i = 0; i < num_pages; i++) { - struct page *p = extent_buffer_page(eb, i); - - clear_page_dirty_for_io(p); - set_page_writeback(p); - ret = submit_extent_page(rw, eb->tree, p, offset >> 9, - PAGE_CACHE_SIZE, 0, bdev, &epd->bio, - -1, end_bio_extent_buffer_writepage, - 0, 0, 0); - if (ret) { - set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); - SetPageError(p); - if (atomic_sub_and_test(num_pages - i, &eb->io_pages)) - end_extent_buffer_writeback(eb); - ret = -EIO; - break; - } - offset += PAGE_CACHE_SIZE; - update_nr_written(p, wbc, 1); - unlock_page(p); - } - - if (unlikely(ret)) { - for (; i < num_pages; i++) { - struct page *p = extent_buffer_page(eb, i); - unlock_page(p); - } - } - - return ret; -} - -int btree_write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc) -{ - struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree; - struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info; - struct extent_buffer *eb, *prev_eb = NULL; - struct extent_page_data epd = { - .bio = NULL, - .tree = tree, - .extent_locked = 0, - .sync_io = wbc->sync_mode == WB_SYNC_ALL, - }; - int ret = 0; - int done = 0; - int nr_to_write_done = 0; - struct pagevec pvec; - int nr_pages; - pgoff_t index; - pgoff_t end; /* Inclusive */ - int scanned = 0; - int tag; - - pagevec_init(&pvec, 0); - if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ - end = -1; - } else { - index = wbc->range_start >> PAGE_CACHE_SHIFT; - end = wbc->range_end >> PAGE_CACHE_SHIFT; - scanned = 1; - } - if (wbc->sync_mode == WB_SYNC_ALL) - tag = PAGECACHE_TAG_TOWRITE; - else - tag = PAGECACHE_TAG_DIRTY; -retry: - if (wbc->sync_mode == WB_SYNC_ALL) - tag_pages_for_writeback(mapping, index, end); - while (!done && !nr_to_write_done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - unsigned i; - - scanned = 1; - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - if (!PagePrivate(page)) - continue; - - if (!wbc->range_cyclic && page->index > end) { - done = 1; - break; - } - - eb = (struct extent_buffer *)page->private; - if (!eb) { - WARN_ON(1); - continue; - } - - if (eb == prev_eb) - continue; - - if (!atomic_inc_not_zero(&eb->refs)) { - WARN_ON(1); - continue; - } - - prev_eb = eb; - ret = lock_extent_buffer_for_io(eb, fs_info, &epd); - if (!ret) { - free_extent_buffer(eb); - continue; - } - - ret = write_one_eb(eb, fs_info, wbc, &epd); - if (ret) { - done = 1; - free_extent_buffer(eb); - break; - } - free_extent_buffer(eb); - - /* - * the filesystem may choose to bump up nr_to_write. - * We have to make sure to honor the new nr_to_write - * at any time - */ - nr_to_write_done = wbc->nr_to_write <= 0; - } - pagevec_release(&pvec); - cond_resched(); - } - if (!scanned && !done) { - /* - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - scanned = 1; - index = 0; - goto retry; - } - flush_write_bio(&epd); - return ret; -} - /** * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. * @mapping: address space structure to write @@ -3435,14 +3099,10 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, static void flush_epd_write_bio(struct extent_page_data *epd) { if (epd->bio) { - int rw = WRITE; - int ret; - if (epd->sync_io) - rw = WRITE_SYNC; - - ret = submit_one_bio(rw, epd->bio, 0, 0); - BUG_ON(ret < 0); /* -ENOMEM */ + submit_one_bio(WRITE_SYNC, epd->bio, 0, 0); + else + submit_one_bio(WRITE, epd->bio, 0, 0); epd->bio = NULL; } } @@ -3559,7 +3219,7 @@ int extent_readpages(struct extent_io_tree *tree, } BUG_ON(!list_empty(pages)); if (bio) - return submit_one_bio(READ, bio, 0, bio_flags); + submit_one_bio(READ, bio, 0, bio_flags); return 0; } @@ -3580,7 +3240,7 @@ int extent_invalidatepage(struct extent_io_tree *tree, if (start > end) return 0; - lock_extent_bits(tree, start, end, 0, &cached_state); + lock_extent_bits(tree, start, end, 0, &cached_state, GFP_NOFS); wait_on_page_writeback(page); clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | @@ -3794,7 +3454,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, - &cached_state); + &cached_state, GFP_NOFS); em = get_extent_skip_holes(inode, start, last_for_get_extent, get_extent); @@ -3888,7 +3548,26 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, inline struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i) { - return eb->pages[i]; + struct page *p; + struct address_space *mapping; + + if (i == 0) + return eb->first_page; + i += eb->start >> PAGE_CACHE_SHIFT; + mapping = eb->first_page->mapping; + if (!mapping) + return NULL; + + /* + * extent_buffer_page is only called after pinning the page + * by increasing the reference count. So we know the page must + * be in the radix tree. + */ + rcu_read_lock(); + p = radix_tree_lookup(&mapping->page_tree, i); + rcu_read_unlock(); + + return p; } inline unsigned long num_extent_pages(u64 start, u64 len) @@ -3897,19 +3576,6 @@ inline unsigned long num_extent_pages(u64 start, u64 len) (start >> PAGE_CACHE_SHIFT); } -static void __free_extent_buffer(struct extent_buffer *eb) -{ -#if LEAK_DEBUG - unsigned long flags; - spin_lock_irqsave(&leak_lock, flags); - list_del(&eb->leak_list); - spin_unlock_irqrestore(&leak_lock, flags); -#endif - if (eb->pages && eb->pages != eb->inline_pages) - kfree(eb->pages); - kmem_cache_free(extent_buffer_cache, eb); -} - static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len, @@ -3925,7 +3591,6 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, return NULL; eb->start = start; eb->len = len; - eb->tree = tree; rwlock_init(&eb->lock); atomic_set(&eb->write_locks, 0); atomic_set(&eb->read_locks, 0); @@ -3942,32 +3607,20 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, list_add(&eb->leak_list, &buffers); spin_unlock_irqrestore(&leak_lock, flags); #endif - spin_lock_init(&eb->refs_lock); atomic_set(&eb->refs, 1); - atomic_set(&eb->io_pages, 0); - - if (len > MAX_INLINE_EXTENT_BUFFER_SIZE) { - struct page **pages; - int num_pages = (len + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - pages = kzalloc(num_pages, mask); - if (!pages) { - __free_extent_buffer(eb); - return NULL; - } - eb->pages = pages; - } else { - eb->pages = eb->inline_pages; - } return eb; } -static int extent_buffer_under_io(struct extent_buffer *eb) +static void __free_extent_buffer(struct extent_buffer *eb) { - return (atomic_read(&eb->io_pages) || - test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || - test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); +#if LEAK_DEBUG + unsigned long flags; + spin_lock_irqsave(&leak_lock, flags); + list_del(&eb->leak_list); + spin_unlock_irqrestore(&leak_lock, flags); +#endif + kmem_cache_free(extent_buffer_cache, eb); } /* @@ -3979,7 +3632,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, unsigned long index; struct page *page; - BUG_ON(extent_buffer_under_io(eb)); + if (!eb->first_page) + return; index = num_extent_pages(eb->start, eb->len); if (start_idx >= index) @@ -3988,34 +3642,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, do { index--; page = extent_buffer_page(eb, index); - if (page) { - spin_lock(&page->mapping->private_lock); - /* - * We do this since we'll remove the pages after we've - * removed the eb from the radix tree, so we could race - * and have this page now attached to the new eb. So - * only clear page_private if it's still connected to - * this eb. - */ - if (PagePrivate(page) && - page->private == (unsigned long)eb) { - BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); - BUG_ON(PageDirty(page)); - BUG_ON(PageWriteback(page)); - /* - * We need to make sure we haven't be attached - * to a new eb. - */ - ClearPagePrivate(page); - set_page_private(page, 0); - /* One for the page private */ - page_cache_release(page); - } - spin_unlock(&page->mapping->private_lock); - - /* One for when we alloced the page */ + if (page) page_cache_release(page); - } } while (index != start_idx); } @@ -4028,50 +3656,9 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) __free_extent_buffer(eb); } -static void check_buffer_tree_ref(struct extent_buffer *eb) -{ - /* the ref bit is tricky. We have to make sure it is set - * if we have the buffer dirty. Otherwise the - * code to free a buffer can end up dropping a dirty - * page - * - * Once the ref bit is set, it won't go away while the - * buffer is dirty or in writeback, and it also won't - * go away while we have the reference count on the - * eb bumped. - * - * We can't just set the ref bit without bumping the - * ref on the eb because free_extent_buffer might - * see the ref bit and try to clear it. If this happens - * free_extent_buffer might end up dropping our original - * ref by mistake and freeing the page before we are able - * to add one more ref. - * - * So bump the ref count first, then set the bit. If someone - * beat us to it, drop the ref we added. - */ - if (!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) { - atomic_inc(&eb->refs); - if (test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) - atomic_dec(&eb->refs); - } -} - -static void mark_extent_buffer_accessed(struct extent_buffer *eb) -{ - unsigned long num_pages, i; - - check_buffer_tree_ref(eb); - - num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { - struct page *p = extent_buffer_page(eb, i); - mark_page_accessed(p); - } -} - struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, - u64 start, unsigned long len) + u64 start, unsigned long len, + struct page *page0) { unsigned long num_pages = num_extent_pages(start, len); unsigned long i; @@ -4087,7 +3674,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); if (eb && atomic_inc_not_zero(&eb->refs)) { rcu_read_unlock(); - mark_extent_buffer_accessed(eb); + mark_page_accessed(eb->first_page); return eb; } rcu_read_unlock(); @@ -4096,43 +3683,32 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, if (!eb) return NULL; - for (i = 0; i < num_pages; i++, index++) { + if (page0) { + eb->first_page = page0; + i = 1; + index++; + page_cache_get(page0); + mark_page_accessed(page0); + set_page_extent_mapped(page0); + set_page_extent_head(page0, len); + uptodate = PageUptodate(page0); + } else { + i = 0; + } + for (; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, GFP_NOFS); if (!p) { WARN_ON(1); goto free_eb; } - - spin_lock(&mapping->private_lock); - if (PagePrivate(p)) { - /* - * We could have already allocated an eb for this page - * and attached one so lets see if we can get a ref on - * the existing eb, and if we can we know it's good and - * we can just return that one, else we know we can just - * overwrite page->private. - */ - exists = (struct extent_buffer *)p->private; - if (atomic_inc_not_zero(&exists->refs)) { - spin_unlock(&mapping->private_lock); - unlock_page(p); - mark_extent_buffer_accessed(exists); - goto free_eb; - } - - /* - * Do this so attach doesn't complain and we need to - * drop the ref the old guy had. - */ - ClearPagePrivate(p); - WARN_ON(PageDirty(p)); - page_cache_release(p); - } - attach_extent_buffer_page(eb, p); - spin_unlock(&mapping->private_lock); - WARN_ON(PageDirty(p)); + set_page_extent_mapped(p); mark_page_accessed(p); - eb->pages[i] = p; + if (i == 0) { + eb->first_page = p; + set_page_extent_head(p, len); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } if (!PageUptodate(p)) uptodate = 0; @@ -4140,10 +3716,12 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, * see below about how we avoid a nasty race with release page * and why we unlock later */ + if (i != 0) + unlock_page(p); } if (uptodate) set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); -again: + ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); if (ret) goto free_eb; @@ -4153,21 +3731,14 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, if (ret == -EEXIST) { exists = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); - if (!atomic_inc_not_zero(&exists->refs)) { - spin_unlock(&tree->buffer_lock); - radix_tree_preload_end(); - exists = NULL; - goto again; - } + /* add one reference for the caller */ + atomic_inc(&exists->refs); spin_unlock(&tree->buffer_lock); radix_tree_preload_end(); - mark_extent_buffer_accessed(exists); goto free_eb; } /* add one reference for the tree */ - spin_lock(&eb->refs_lock); - check_buffer_tree_ref(eb); - spin_unlock(&eb->refs_lock); + atomic_inc(&eb->refs); spin_unlock(&tree->buffer_lock); radix_tree_preload_end(); @@ -4180,20 +3751,15 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, * after the extent buffer is in the radix tree so * it doesn't get lost */ - SetPageChecked(eb->pages[0]); - for (i = 1; i < num_pages; i++) { - p = extent_buffer_page(eb, i); - ClearPageChecked(p); - unlock_page(p); - } - unlock_page(eb->pages[0]); + set_page_extent_mapped(eb->first_page); + set_page_extent_head(eb->first_page, eb->len); + if (!page0) + unlock_page(eb->first_page); return eb; free_eb: - for (i = 0; i < num_pages; i++) { - if (eb->pages[i]) - unlock_page(eb->pages[i]); - } + if (eb->first_page && !page0) + unlock_page(eb->first_page); if (!atomic_dec_and_test(&eb->refs)) return exists; @@ -4210,7 +3776,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); if (eb && atomic_inc_not_zero(&eb->refs)) { rcu_read_unlock(); - mark_extent_buffer_accessed(eb); + mark_page_accessed(eb->first_page); return eb; } rcu_read_unlock(); @@ -4218,71 +3784,19 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, return NULL; } -static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) -{ - struct extent_buffer *eb = - container_of(head, struct extent_buffer, rcu_head); - - __free_extent_buffer(eb); -} - -/* Expects to have eb->eb_lock already held */ -static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) -{ - WARN_ON(atomic_read(&eb->refs) == 0); - if (atomic_dec_and_test(&eb->refs)) { - struct extent_io_tree *tree = eb->tree; - - spin_unlock(&eb->refs_lock); - - spin_lock(&tree->buffer_lock); - radix_tree_delete(&tree->buffer, - eb->start >> PAGE_CACHE_SHIFT); - spin_unlock(&tree->buffer_lock); - - /* Should be safe to release our pages at this point */ - btrfs_release_extent_buffer_page(eb, 0); - - call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); - return; - } - spin_unlock(&eb->refs_lock); -} - void free_extent_buffer(struct extent_buffer *eb) { if (!eb) return; - spin_lock(&eb->refs_lock); - if (atomic_read(&eb->refs) == 2 && - test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && - !extent_buffer_under_io(eb) && - test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) - atomic_dec(&eb->refs); - - /* - * I know this is terrible, but it's temporary until we stop tracking - * the uptodate bits and such for the extent buffers. - */ - release_extent_buffer(eb, GFP_ATOMIC); -} - -void free_extent_buffer_stale(struct extent_buffer *eb) -{ - if (!eb) + if (!atomic_dec_and_test(&eb->refs)) return; - spin_lock(&eb->refs_lock); - set_bit(EXTENT_BUFFER_STALE, &eb->bflags); - - if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) && - test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) - atomic_dec(&eb->refs); - release_extent_buffer(eb, GFP_NOFS); + WARN_ON(1); } -void clear_extent_buffer_dirty(struct extent_buffer *eb) +int clear_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb) { unsigned long i; unsigned long num_pages; @@ -4298,6 +3812,10 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb) lock_page(page); WARN_ON(!PagePrivate(page)); + set_page_extent_mapped(page); + if (i == 0) + set_page_extent_head(page, eb->len); + clear_page_dirty_for_io(page); spin_lock_irq(&page->mapping->tree_lock); if (!PageDirty(page)) { @@ -4309,29 +3827,24 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb) ClearPageError(page); unlock_page(page); } - WARN_ON(atomic_read(&eb->refs) == 0); + return 0; } -int set_extent_buffer_dirty(struct extent_buffer *eb) +int set_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb) { unsigned long i; unsigned long num_pages; int was_dirty = 0; - check_buffer_tree_ref(eb); - was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); - num_pages = num_extent_pages(eb->start, eb->len); - WARN_ON(atomic_read(&eb->refs) == 0); - WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); - for (i = 0; i < num_pages; i++) - set_page_dirty(extent_buffer_page(eb, i)); + __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); return was_dirty; } -static int range_straddles_pages(u64 start, u64 len) +static int __eb_straddles_pages(u64 start, u64 len) { if (len < PAGE_CACHE_SIZE) return 1; @@ -4342,14 +3855,25 @@ static int range_straddles_pages(u64 start, u64 len) return 0; } -int clear_extent_buffer_uptodate(struct extent_buffer *eb) +static int eb_straddles_pages(struct extent_buffer *eb) +{ + return __eb_straddles_pages(eb->start, eb->len); +} + +int clear_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb, + struct extent_state **cached_state) { unsigned long i; struct page *page; unsigned long num_pages; - clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); num_pages = num_extent_pages(eb->start, eb->len); + clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + + clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + cached_state, GFP_NOFS); + for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (page) @@ -4358,16 +3882,27 @@ int clear_extent_buffer_uptodate(struct extent_buffer *eb) return 0; } -int set_extent_buffer_uptodate(struct extent_buffer *eb) +int set_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) { unsigned long i; struct page *page; unsigned long num_pages; - set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); num_pages = num_extent_pages(eb->start, eb->len); + + if (eb_straddles_pages(eb)) { + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + NULL, GFP_NOFS); + } for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { + check_page_uptodate(tree, page); + continue; + } SetPageUptodate(page); } return 0; @@ -4382,7 +3917,7 @@ int extent_range_uptodate(struct extent_io_tree *tree, int uptodate; unsigned long index; - if (range_straddles_pages(start, end - start + 1)) { + if (__eb_straddles_pages(start, end - start + 1)) { ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); if (ret) @@ -4404,9 +3939,35 @@ int extent_range_uptodate(struct extent_io_tree *tree, return pg_uptodate; } -int extent_buffer_uptodate(struct extent_buffer *eb) +int extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb, + struct extent_state *cached_state) { - return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + int ret = 0; + unsigned long num_pages; + unsigned long i; + struct page *page; + int pg_uptodate = 1; + + if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) + return 1; + + if (eb_straddles_pages(eb)) { + ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1, cached_state); + if (ret) + return ret; + } + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if (!PageUptodate(page)) { + pg_uptodate = 0; + break; + } + } + return pg_uptodate; } int read_extent_buffer_pages(struct extent_io_tree *tree, @@ -4420,14 +3981,21 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, int ret = 0; int locked_pages = 0; int all_uptodate = 1; + int inc_all_pages = 0; unsigned long num_pages; - unsigned long num_reads = 0; struct bio *bio = NULL; unsigned long bio_flags = 0; if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; + if (eb_straddles_pages(eb)) { + if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1, NULL)) { + return 0; + } + } + if (start) { WARN_ON(start < eb->start); start_i = (start >> PAGE_CACHE_SHIFT) - @@ -4446,10 +4014,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, lock_page(page); } locked_pages++; - if (!PageUptodate(page)) { - num_reads++; + if (!PageUptodate(page)) all_uptodate = 0; - } } if (all_uptodate) { if (start_i == 0) @@ -4457,12 +4023,20 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, goto unlock_exit; } - clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); - eb->failed_mirror = 0; - atomic_set(&eb->io_pages, num_reads); for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); + + WARN_ON(!PagePrivate(page)); + + set_page_extent_mapped(page); + if (i == 0) + set_page_extent_head(page, eb->len); + + if (inc_all_pages) + page_cache_get(page); if (!PageUptodate(page)) { + if (start_i == 0) + inc_all_pages = 1; ClearPageError(page); err = __extent_read_full_page(tree, page, get_extent, &bio, @@ -4474,11 +4048,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, } } - if (bio) { - err = submit_one_bio(READ, bio, mirror_num, bio_flags); - if (err) - return err; - } + if (bio) + submit_one_bio(READ, bio, mirror_num, bio_flags); if (ret || wait != WAIT_COMPLETE) return ret; @@ -4490,6 +4061,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, ret = -EIO; } + if (!ret) + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); return ret; unlock_exit: @@ -4731,20 +4304,15 @@ static void copy_pages(struct page *dst_page, struct page *src_page, { char *dst_kaddr = page_address(dst_page); char *src_kaddr; - int must_memmove = 0; if (dst_page != src_page) { src_kaddr = page_address(src_page); } else { src_kaddr = dst_kaddr; - if (areas_overlap(src_off, dst_off, len)) - must_memmove = 1; + BUG_ON(areas_overlap(src_off, dst_off, len)); } - if (must_memmove) - memmove(dst_kaddr + dst_off, src_kaddr + src_off, len); - else - memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); + memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); } void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, @@ -4814,7 +4382,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, "len %lu len %lu\n", dst_offset, len, dst->len); BUG_ON(1); } - if (dst_offset < src_offset) { + if (!areas_overlap(src_offset, dst_offset, len)) { memcpy_extent_buffer(dst, dst_offset, src_offset, len); return; } @@ -4840,48 +4408,47 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, } } -int try_release_extent_buffer(struct page *page, gfp_t mask) +static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) { - struct extent_buffer *eb; + struct extent_buffer *eb = + container_of(head, struct extent_buffer, rcu_head); - /* - * We need to make sure noboody is attaching this page to an eb right - * now. - */ - spin_lock(&page->mapping->private_lock); - if (!PagePrivate(page)) { - spin_unlock(&page->mapping->private_lock); - return 1; - } + btrfs_release_extent_buffer(eb); +} - eb = (struct extent_buffer *)page->private; - BUG_ON(!eb); +int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) +{ + u64 start = page_offset(page); + struct extent_buffer *eb; + int ret = 1; - /* - * This is a little awful but should be ok, we need to make sure that - * the eb doesn't disappear out from under us while we're looking at - * this page. - */ - spin_lock(&eb->refs_lock); - if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { - spin_unlock(&eb->refs_lock); - spin_unlock(&page->mapping->private_lock); - return 0; + spin_lock(&tree->buffer_lock); + eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); + if (!eb) { + spin_unlock(&tree->buffer_lock); + return ret; } - spin_unlock(&page->mapping->private_lock); - if ((mask & GFP_NOFS) == GFP_NOFS) - mask = GFP_NOFS; + if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { + ret = 0; + goto out; + } /* - * If tree ref isn't set then we know the ref on this eb is a real ref, - * so just return, this page will likely be freed soon anyway. + * set @eb->refs to 0 if it is already 1, and then release the @eb. + * Or go back. */ - if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) { - spin_unlock(&eb->refs_lock); - return 0; + if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) { + ret = 0; + goto out; } - release_extent_buffer(eb, mask); - return 1; + radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT); +out: + spin_unlock(&tree->buffer_lock); + + /* at this point we can safely release the extent buffer */ + if (atomic_read(&eb->refs) == 0) + call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); + return ret; } diff --git a/trunk/fs/btrfs/extent_io.h b/trunk/fs/btrfs/extent_io.h index faf10eb57f75..cecc3518c121 100644 --- a/trunk/fs/btrfs/extent_io.h +++ b/trunk/fs/btrfs/extent_io.h @@ -35,10 +35,6 @@ #define EXTENT_BUFFER_DIRTY 2 #define EXTENT_BUFFER_CORRUPT 3 #define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ -#define EXTENT_BUFFER_TREE_REF 5 -#define EXTENT_BUFFER_STALE 6 -#define EXTENT_BUFFER_WRITEBACK 7 -#define EXTENT_BUFFER_IOERR 8 /* these are flags for extent_clear_unlock_delalloc */ #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 @@ -58,7 +54,6 @@ #define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 struct extent_state; -struct btrfs_root; typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, struct bio *bio, int mirror_num, @@ -74,7 +69,9 @@ struct extent_io_ops { size_t size, struct bio *bio, unsigned long bio_flags); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); - int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); + int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, + u64 start, u64 end, int failed_mirror, + struct extent_state *state); int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, u64 start, u64 end, struct extent_state *state); @@ -100,7 +97,6 @@ struct extent_io_tree { struct radix_tree_root buffer; struct address_space *mapping; u64 dirty_bytes; - int track_uptodate; spinlock_t lock; spinlock_t buffer_lock; struct extent_io_ops *ops; @@ -123,21 +119,16 @@ struct extent_state { struct list_head leak_list; }; -#define INLINE_EXTENT_BUFFER_PAGES 16 -#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_CACHE_SIZE) struct extent_buffer { u64 start; unsigned long len; unsigned long map_start; unsigned long map_len; + struct page *first_page; unsigned long bflags; - struct extent_io_tree *tree; - spinlock_t refs_lock; - atomic_t refs; - atomic_t io_pages; - int failed_mirror; struct list_head leak_list; struct rcu_head rcu_head; + atomic_t refs; pid_t lock_owner; /* count of read lock holders on the extent buffer */ @@ -161,9 +152,6 @@ struct extent_buffer { * to unlock */ wait_queue_head_t read_lock_wq; - wait_queue_head_t lock_wq; - struct page *inline_pages[INLINE_EXTENT_BUFFER_PAGES]; - struct page **pages; }; static inline void extent_set_compress_type(unsigned long *bio_flags, @@ -190,17 +178,18 @@ void extent_io_tree_init(struct extent_io_tree *tree, int try_release_extent_mapping(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask); -int try_release_extent_buffer(struct page *page, gfp_t mask); +int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page); int try_release_extent_state(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask); -int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - int bits, struct extent_state **cached); -int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end); + int bits, struct extent_state **cached, gfp_t mask); +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached, gfp_t mask); -int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); +int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, int mirror_num); int __init extent_io_init(void); @@ -221,7 +210,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, u64 *failed_start, + int bits, int exclusive_bits, u64 *failed_start, struct extent_state **cached_state, gfp_t mask); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask); @@ -251,8 +240,6 @@ int extent_writepages(struct extent_io_tree *tree, struct address_space *mapping, get_extent_t *get_extent, struct writeback_control *wbc); -int btree_write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc); int extent_readpages(struct extent_io_tree *tree, struct address_space *mapping, struct list_head *pages, unsigned nr_pages, @@ -264,11 +251,11 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); void set_page_extent_mapped(struct page *page); struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, - u64 start, unsigned long len); + u64 start, unsigned long len, + struct page *page0); struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len); void free_extent_buffer(struct extent_buffer *eb); -void free_extent_buffer_stale(struct extent_buffer *eb); #define WAIT_NONE 0 #define WAIT_COMPLETE 1 #define WAIT_PAGE_LOCK 2 @@ -300,12 +287,19 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_offset, unsigned long len); void memset_extent_buffer(struct extent_buffer *eb, char c, unsigned long start, unsigned long len); -void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); -void clear_extent_buffer_dirty(struct extent_buffer *eb); -int set_extent_buffer_dirty(struct extent_buffer *eb); -int set_extent_buffer_uptodate(struct extent_buffer *eb); -int clear_extent_buffer_uptodate(struct extent_buffer *eb); -int extent_buffer_uptodate(struct extent_buffer *eb); +int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); +int clear_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); +int clear_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb, + struct extent_state **cached_state); +int extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb, + struct extent_state *cached_state); int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long min_len, char **map, unsigned long *map_start, @@ -326,6 +320,4 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, u64 length, u64 logical, struct page *page, int mirror_num); int end_extent_writepage(struct page *page, int err, u64 start, u64 end); -int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, - int mirror_num); #endif diff --git a/trunk/fs/btrfs/file-item.c b/trunk/fs/btrfs/file-item.c index 5d158d320233..078b4fd54500 100644 --- a/trunk/fs/btrfs/file-item.c +++ b/trunk/fs/btrfs/file-item.c @@ -25,12 +25,10 @@ #include "transaction.h" #include "print-tree.h" -#define __MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ +#define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) * 2) / \ size) - 1)) -#define MAX_CSUM_ITEMS(r, size) (min(__MAX_CSUM_ITEMS(r, size), PAGE_CACHE_SIZE)) - #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ sizeof(struct btrfs_ordered_sum)) / \ sizeof(struct btrfs_sector_sum) * \ @@ -61,7 +59,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, sizeof(*item)); if (ret < 0) goto out; - BUG_ON(ret); /* Can't happen */ + BUG_ON(ret); leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); @@ -286,7 +284,6 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct btrfs_ordered_sum *sums; struct btrfs_sector_sum *sector_sum; struct btrfs_csum_item *item; - LIST_HEAD(tmplist); unsigned long offset; int ret; size_t size; @@ -361,10 +358,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, MAX_ORDERED_SUM_BYTES(root)); sums = kzalloc(btrfs_ordered_sum_size(root, size), GFP_NOFS); - if (!sums) { - ret = -ENOMEM; - goto fail; - } + BUG_ON(!sums); sector_sum = sums->sums; sums->bytenr = start; @@ -386,19 +380,12 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, offset += csum_size; sector_sum++; } - list_add_tail(&sums->list, &tmplist); + list_add_tail(&sums->list, list); } path->slots[0]++; } ret = 0; fail: - while (ret < 0 && !list_empty(&tmplist)) { - sums = list_entry(&tmplist, struct btrfs_ordered_sum, list); - list_del(&sums->list); - kfree(sums); - } - list_splice_tail(&tmplist, list); - btrfs_free_path(path); return ret; } @@ -433,7 +420,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, offset = page_offset(bvec->bv_page) + bvec->bv_offset; ordered = btrfs_lookup_ordered_extent(inode, offset); - BUG_ON(!ordered); /* Logic error */ + BUG_ON(!ordered); sums->bytenr = ordered->start; while (bio_index < bio->bi_vcnt) { @@ -452,11 +439,11 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), GFP_NOFS); - BUG_ON(!sums); /* -ENOMEM */ + BUG_ON(!sums); sector_sum = sums->sums; sums->len = bytes_left; ordered = btrfs_lookup_ordered_extent(inode, offset); - BUG_ON(!ordered); /* Logic error */ + BUG_ON(!ordered); sums->bytenr = ordered->start; } @@ -496,17 +483,18 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, * This calls btrfs_truncate_item with the correct args based on the * overlap, and fixes up the key as required. */ -static noinline void truncate_one_csum(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *key, - u64 bytenr, u64 len) +static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key, + u64 bytenr, u64 len) { struct extent_buffer *leaf; u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); u64 csum_end; u64 end_byte = bytenr + len; u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits; + int ret; leaf = path->nodes[0]; csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; @@ -522,7 +510,7 @@ static noinline void truncate_one_csum(struct btrfs_trans_handle *trans, */ u32 new_size = (bytenr - key->offset) >> blocksize_bits; new_size *= csum_size; - btrfs_truncate_item(trans, root, path, new_size, 1); + ret = btrfs_truncate_item(trans, root, path, new_size, 1); } else if (key->offset >= bytenr && csum_end > end_byte && end_byte > key->offset) { /* @@ -534,13 +522,15 @@ static noinline void truncate_one_csum(struct btrfs_trans_handle *trans, u32 new_size = (csum_end - end_byte) >> blocksize_bits; new_size *= csum_size; - btrfs_truncate_item(trans, root, path, new_size, 0); + ret = btrfs_truncate_item(trans, root, path, new_size, 0); key->offset = end_byte; - btrfs_set_item_key_safe(trans, root, path, key); + ret = btrfs_set_item_key_safe(trans, root, path, key); + BUG_ON(ret); } else { BUG(); } + return 0; } /* @@ -645,14 +635,13 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, * item changed size or key */ ret = btrfs_split_item(trans, root, path, &key, offset); - if (ret && ret != -EAGAIN) { - btrfs_abort_transaction(trans, root, ret); - goto out; - } + BUG_ON(ret && ret != -EAGAIN); key.offset = end_byte - 1; } else { - truncate_one_csum(trans, root, path, &key, bytenr, len); + ret = truncate_one_csum(trans, root, path, + &key, bytenr, len); + BUG_ON(ret); if (key.offset < bytenr) break; } @@ -783,7 +772,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, if (diff != csum_size) goto insert; - btrfs_extend_item(trans, root, path, diff); + ret = btrfs_extend_item(trans, root, path, diff); goto csum; } diff --git a/trunk/fs/btrfs/file.c b/trunk/fs/btrfs/file.c index d83260d7498f..e8d06b6b9194 100644 --- a/trunk/fs/btrfs/file.c +++ b/trunk/fs/btrfs/file.c @@ -452,7 +452,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split = alloc_extent_map(); if (!split2) split2 = alloc_extent_map(); - BUG_ON(!split || !split2); /* -ENOMEM */ + BUG_ON(!split || !split2); write_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); @@ -494,7 +494,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->flags = flags; split->compress_type = em->compress_type; ret = add_extent_mapping(em_tree, split); - BUG_ON(ret); /* Logic error */ + BUG_ON(ret); free_extent_map(split); split = split2; split2 = NULL; @@ -520,7 +520,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, } ret = add_extent_mapping(em_tree, split); - BUG_ON(ret); /* Logic error */ + BUG_ON(ret); free_extent_map(split); split = NULL; } @@ -679,7 +679,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, root->root_key.objectid, new_key.objectid, start - extent_offset, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); *hint_byte = disk_bytenr; } key.offset = start; @@ -754,7 +754,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, root->root_key.objectid, key.objectid, key.offset - extent_offset, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); inode_sub_bytes(inode, extent_end - key.offset); *hint_byte = disk_bytenr; @@ -770,10 +770,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, ret = btrfs_del_items(trans, root, path, del_slot, del_nr); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto out; - } + BUG_ON(ret); del_nr = 0; del_slot = 0; @@ -785,13 +782,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, BUG_ON(1); } - if (!ret && del_nr > 0) { + if (del_nr > 0) { ret = btrfs_del_items(trans, root, path, del_slot, del_nr); - if (ret) - btrfs_abort_transaction(trans, root, ret); + BUG_ON(ret); } -out: btrfs_free_path(path); return ret; } @@ -949,10 +944,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, btrfs_release_path(path); goto again; } - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); - goto out; - } + BUG_ON(ret < 0); leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0] - 1, @@ -971,7 +963,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, ino, orig_offset, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); if (split == start) { key.offset = start; @@ -998,7 +990,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, ino, orig_offset, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } other_start = 0; other_end = start; @@ -1015,7 +1007,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, ino, orig_offset, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } if (del_nr == 0) { fi = btrfs_item_ptr(leaf, path->slots[0], @@ -1033,10 +1025,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); ret = btrfs_del_items(trans, root, path, del_slot, del_nr); - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); - goto out; - } + BUG_ON(ret); } out: btrfs_free_path(path); @@ -1116,7 +1105,8 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, if (start_pos < inode->i_size) { struct btrfs_ordered_extent *ordered; lock_extent_bits(&BTRFS_I(inode)->io_tree, - start_pos, last_pos - 1, 0, &cached_state); + start_pos, last_pos - 1, 0, &cached_state, + GFP_NOFS); ordered = btrfs_lookup_first_ordered_extent(inode, last_pos - 1); if (ordered && @@ -1648,7 +1638,7 @@ static long btrfs_fallocate(struct file *file, int mode, * transaction */ lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, - locked_end, 0, &cached_state); + locked_end, 0, &cached_state, GFP_NOFS); ordered = btrfs_lookup_first_ordered_extent(inode, alloc_end - 1); if (ordered && @@ -1677,13 +1667,7 @@ static long btrfs_fallocate(struct file *file, int mode, em = btrfs_get_extent(inode, NULL, 0, cur_offset, alloc_end - cur_offset, 0); - if (IS_ERR_OR_NULL(em)) { - if (!em) - ret = -ENOMEM; - else - ret = PTR_ERR(em); - break; - } + BUG_ON(IS_ERR_OR_NULL(em)); last_byte = min(extent_map_end(em), alloc_end); actual_end = min_t(u64, extent_map_end(em), offset + len); last_byte = (last_byte + mask) & ~mask; @@ -1753,7 +1737,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) return -ENXIO; lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, - &cached_state); + &cached_state, GFP_NOFS); /* * Delalloc is such a pain. If we have a hole and we have pending diff --git a/trunk/fs/btrfs/free-space-cache.c b/trunk/fs/btrfs/free-space-cache.c index e88330d3df52..b02e379b14c7 100644 --- a/trunk/fs/btrfs/free-space-cache.c +++ b/trunk/fs/btrfs/free-space-cache.c @@ -230,13 +230,11 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, if (ret) { trans->block_rsv = rsv; - btrfs_abort_transaction(trans, root, ret); + WARN_ON(1); return ret; } ret = btrfs_update_inode(trans, root, inode); - if (ret) - btrfs_abort_transaction(trans, root, ret); trans->block_rsv = rsv; return ret; @@ -871,7 +869,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, io_ctl_prepare_pages(&io_ctl, inode, 0); lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, - 0, &cached_state); + 0, &cached_state, GFP_NOFS); node = rb_first(&ctl->free_space_offset); if (!node && cluster) { @@ -1950,14 +1948,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, */ ret = btrfs_add_free_space(block_group, old_start, offset - old_start); - WARN_ON(ret); /* -ENOMEM */ + WARN_ON(ret); goto out; } ret = remove_from_bitmap(ctl, info, &offset, &bytes); if (ret == -EAGAIN) goto again; - BUG_ON(ret); /* logic error */ + BUG_ON(ret); out_lock: spin_unlock(&ctl->tree_lock); out: @@ -2348,7 +2346,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, rb_erase(&entry->offset_index, &ctl->free_space_offset); ret = tree_insert_offset(&cluster->root, entry->offset, &entry->offset_index, 1); - BUG_ON(ret); /* -EEXIST; Logic error */ + BUG_ON(ret); trace_btrfs_setup_cluster(block_group, cluster, total_found * block_group->sectorsize, 1); @@ -2441,7 +2439,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, ret = tree_insert_offset(&cluster->root, entry->offset, &entry->offset_index, 0); total_size += entry->bytes; - BUG_ON(ret); /* -EEXIST; Logic error */ + BUG_ON(ret); } while (node && entry != last); cluster->max_size = max_extent; @@ -2832,7 +2830,6 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) int ret; ret = search_bitmap(ctl, entry, &offset, &count); - /* Logic error; Should be empty if it can't find anything */ BUG_ON(ret); ino = offset; diff --git a/trunk/fs/btrfs/inode-item.c b/trunk/fs/btrfs/inode-item.c index a13cf1a96c73..baa74f3db691 100644 --- a/trunk/fs/btrfs/inode-item.c +++ b/trunk/fs/btrfs/inode-item.c @@ -19,7 +19,6 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" -#include "print-tree.h" static int find_name_in_backref(struct btrfs_path *path, const char *name, int name_len, struct btrfs_inode_ref **ref_ret) @@ -129,14 +128,13 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_size - (ptr + sub_item_len - item_start)); - btrfs_truncate_item(trans, root, path, + ret = btrfs_truncate_item(trans, root, path, item_size - sub_item_len, 1); out: btrfs_free_path(path); return ret; } -/* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, @@ -167,7 +165,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, goto out; old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); - btrfs_extend_item(trans, root, path, ins_len); + ret = btrfs_extend_item(trans, root, path, ins_len); ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_ref); ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); diff --git a/trunk/fs/btrfs/inode-map.c b/trunk/fs/btrfs/inode-map.c index b1a1c929ba80..ee15d88b33d2 100644 --- a/trunk/fs/btrfs/inode-map.c +++ b/trunk/fs/btrfs/inode-map.c @@ -178,7 +178,7 @@ static void start_caching(struct btrfs_root *root) tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", root->root_key.objectid); - BUG_ON(IS_ERR(tsk)); /* -ENOMEM */ + BUG_ON(IS_ERR(tsk)); } int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) @@ -271,7 +271,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) break; info = rb_entry(n, struct btrfs_free_space, offset_index); - BUG_ON(info->bitmap); /* Logic error */ + BUG_ON(info->bitmap); if (info->offset > root->cache_progress) goto free; @@ -439,16 +439,17 @@ int btrfs_save_ino_cache(struct btrfs_root *root, if (ret) goto out; trace_btrfs_space_reservation(root->fs_info, "ino_cache", - trans->transid, trans->bytes_reserved, 1); + (u64)(unsigned long)trans, + trans->bytes_reserved, 1); again: inode = lookup_free_ino_inode(root, path); - if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) { + if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { ret = PTR_ERR(inode); goto out_release; } if (IS_ERR(inode)) { - BUG_ON(retry); /* Logic error */ + BUG_ON(retry); retry = true; ret = create_free_ino_inode(root, trans, path); @@ -459,17 +460,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root, BTRFS_I(inode)->generation = 0; ret = btrfs_update_inode(trans, root, inode); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto out_put; - } + WARN_ON(ret); if (i_size_read(inode) > 0) { ret = btrfs_truncate_free_space_cache(root, trans, path, inode); - if (ret) { - btrfs_abort_transaction(trans, root, ret); + if (ret) goto out_put; - } } spin_lock(&root->cache_lock); @@ -506,7 +502,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root, iput(inode); out_release: trace_btrfs_space_reservation(root->fs_info, "ino_cache", - trans->transid, trans->bytes_reserved, 0); + (u64)(unsigned long)trans, + trans->bytes_reserved, 0); btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); out: trans->block_rsv = rsv; @@ -535,7 +532,7 @@ static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid) ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto error; - BUG_ON(ret == 0); /* Corruption */ + BUG_ON(ret == 0); if (path->slots[0] > 0) { slot = path->slots[0] - 1; l = path->nodes[0]; diff --git a/trunk/fs/btrfs/inode.c b/trunk/fs/btrfs/inode.c index 115bc05e42b0..3a0b5c1f9d31 100644 --- a/trunk/fs/btrfs/inode.c +++ b/trunk/fs/btrfs/inode.c @@ -150,6 +150,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, inode_add_bytes(inode, size); ret = btrfs_insert_empty_item(trans, root, path, &key, datasize); + BUG_ON(ret); if (ret) { err = ret; goto fail; @@ -205,9 +206,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, * could end up racing with unlink. */ BTRFS_I(inode)->disk_i_size = inode->i_size; - ret = btrfs_update_inode(trans, root, inode); + btrfs_update_inode(trans, root, inode); - return ret; + return 0; fail: btrfs_free_path(path); return err; @@ -249,18 +250,14 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, ret = btrfs_drop_extents(trans, inode, start, aligned_end, &hint_byte, 1); - if (ret) - return ret; + BUG_ON(ret); if (isize > actual_end) inline_len = min_t(u64, isize, actual_end); ret = insert_inline_extent(trans, root, inode, start, inline_len, compressed_size, compress_type, compressed_pages); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - return ret; - } + BUG_ON(ret); btrfs_delalloc_release_metadata(inode, end + 1 - start); btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); return 0; @@ -296,7 +293,7 @@ static noinline int add_async_extent(struct async_cow *cow, struct async_extent *async_extent; async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); - BUG_ON(!async_extent); /* -ENOMEM */ + BUG_ON(!async_extent); async_extent->start = start; async_extent->ram_size = ram_size; async_extent->compressed_size = compressed_size; @@ -347,9 +344,8 @@ static noinline int compress_file_range(struct inode *inode, int will_compress; int compress_type = root->fs_info->compress_type; - /* if this is a small write inside eof, kick off a defrag */ - if ((end - start + 1) < 16 * 1024 && - (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) + /* if this is a small write inside eof, kick off a defragbot */ + if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024) btrfs_add_inode_defrag(NULL, inode); actual_end = min_t(u64, isize, end + 1); @@ -437,11 +433,7 @@ static noinline int compress_file_range(struct inode *inode, cont: if (start == 0) { trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - trans = NULL; - goto cleanup_and_out; - } + BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; /* lets try to make an inline extent */ @@ -458,11 +450,11 @@ static noinline int compress_file_range(struct inode *inode, total_compressed, compress_type, pages); } - if (ret <= 0) { + if (ret == 0) { /* - * inline extent creation worked or returned error, - * we don't need to create any more async work items. - * Unlock and free up our temp pages. + * inline extent creation worked, we don't need + * to create any more async work items. Unlock + * and free up our temp pages. */ extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, @@ -555,7 +547,7 @@ static noinline int compress_file_range(struct inode *inode, } out: - return ret; + return 0; free_pages_out: for (i = 0; i < nr_pages_ret; i++) { @@ -565,20 +557,6 @@ static noinline int compress_file_range(struct inode *inode, kfree(pages); goto out; - -cleanup_and_out: - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, end, NULL, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_DIRTY | - EXTENT_CLEAR_DELALLOC | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - if (!trans || IS_ERR(trans)) - btrfs_error(root->fs_info, ret, "Failed to join transaction"); - else - btrfs_abort_transaction(trans, root, ret); - goto free_pages_out; } /* @@ -619,7 +597,7 @@ static noinline int submit_compressed_extents(struct inode *inode, lock_extent(io_tree, async_extent->start, async_extent->start + - async_extent->ram_size - 1); + async_extent->ram_size - 1, GFP_NOFS); /* allocate blocks */ ret = cow_file_range(inode, async_cow->locked_page, @@ -628,8 +606,6 @@ static noinline int submit_compressed_extents(struct inode *inode, async_extent->ram_size - 1, &page_started, &nr_written, 0); - /* JDM XXX */ - /* * if page_started, cow_file_range inserted an * inline extent and took care of all the unlocking @@ -649,21 +625,18 @@ static noinline int submit_compressed_extents(struct inode *inode, } lock_extent(io_tree, async_extent->start, - async_extent->start + async_extent->ram_size - 1); + async_extent->start + async_extent->ram_size - 1, + GFP_NOFS); trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - } else { - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - ret = btrfs_reserve_extent(trans, root, + BUG_ON(IS_ERR(trans)); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; + ret = btrfs_reserve_extent(trans, root, async_extent->compressed_size, async_extent->compressed_size, - 0, alloc_hint, &ins, 1); - if (ret) - btrfs_abort_transaction(trans, root, ret); - btrfs_end_transaction(trans, root); - } + 0, alloc_hint, + (u64)-1, &ins, 1); + btrfs_end_transaction(trans, root); if (ret) { int i; @@ -676,10 +649,8 @@ static noinline int submit_compressed_extents(struct inode *inode, async_extent->pages = NULL; unlock_extent(io_tree, async_extent->start, async_extent->start + - async_extent->ram_size - 1); - if (ret == -ENOSPC) - goto retry; - goto out_free; /* JDM: Requeue? */ + async_extent->ram_size - 1, GFP_NOFS); + goto retry; } /* @@ -691,7 +662,7 @@ static noinline int submit_compressed_extents(struct inode *inode, async_extent->ram_size - 1, 0); em = alloc_extent_map(); - BUG_ON(!em); /* -ENOMEM */ + BUG_ON(!em); em->start = async_extent->start; em->len = async_extent->ram_size; em->orig_start = em->start; @@ -723,7 +694,7 @@ static noinline int submit_compressed_extents(struct inode *inode, ins.offset, BTRFS_ORDERED_COMPRESSED, async_extent->compress_type); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); /* * clear dirty, set writeback and unlock the pages. @@ -745,17 +716,13 @@ static noinline int submit_compressed_extents(struct inode *inode, ins.offset, async_extent->pages, async_extent->nr_pages); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); alloc_hint = ins.objectid + ins.offset; kfree(async_extent); cond_resched(); } - ret = 0; -out: - return ret; -out_free: - kfree(async_extent); - goto out; + + return 0; } static u64 get_extent_allocation_hint(struct inode *inode, u64 start, @@ -824,18 +791,7 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(btrfs_is_free_space_inode(root, inode)); trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, NULL, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - return PTR_ERR(trans); - } + BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; num_bytes = (end - start + blocksize) & ~(blocksize - 1); @@ -844,8 +800,7 @@ static noinline int cow_file_range(struct inode *inode, ret = 0; /* if this is a small write inside eof, kick off defrag */ - if (num_bytes < 64 * 1024 && - (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) + if (end <= BTRFS_I(inode)->disk_i_size && num_bytes < 64 * 1024) btrfs_add_inode_defrag(trans, inode); if (start == 0) { @@ -866,10 +821,8 @@ static noinline int cow_file_range(struct inode *inode, *nr_written = *nr_written + (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; *page_started = 1; + ret = 0; goto out; - } else if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); - goto out_unlock; } } @@ -885,14 +838,11 @@ static noinline int cow_file_range(struct inode *inode, cur_alloc_size = disk_num_bytes; ret = btrfs_reserve_extent(trans, root, cur_alloc_size, root->sectorsize, 0, alloc_hint, - &ins, 1); - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); - goto out_unlock; - } + (u64)-1, &ins, 1); + BUG_ON(ret); em = alloc_extent_map(); - BUG_ON(!em); /* -ENOMEM */ + BUG_ON(!em); em->start = start; em->orig_start = em->start; ram_size = ins.offset; @@ -918,16 +868,13 @@ static noinline int cow_file_range(struct inode *inode, cur_alloc_size = ins.offset; ret = btrfs_add_ordered_extent(inode, start, ins.objectid, ram_size, cur_alloc_size, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_reloc_clone_csums(inode, start, cur_alloc_size); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto out_unlock; - } + BUG_ON(ret); } if (disk_num_bytes < cur_alloc_size) @@ -952,23 +899,11 @@ static noinline int cow_file_range(struct inode *inode, alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } - ret = 0; out: + ret = 0; btrfs_end_transaction(trans, root); return ret; -out_unlock: - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, NULL, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - - goto out; } /* @@ -1034,7 +969,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, 1, 0, NULL, GFP_NOFS); while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); - BUG_ON(!async_cow); /* -ENOMEM */ + BUG_ON(!async_cow); async_cow->inode = inode; async_cow->root = root; async_cow->locked_page = locked_page; @@ -1125,7 +1060,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, u64 disk_bytenr; u64 num_bytes; int extent_type; - int ret, err; + int ret; int type; int nocow; int check_prev = 1; @@ -1143,11 +1078,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, else trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - btrfs_free_path(path); - return PTR_ERR(trans); - } - + BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; cow_start = (u64)-1; @@ -1155,10 +1086,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, while (1) { ret = btrfs_lookup_file_extent(trans, root, path, ino, cur_offset, 0); - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); - goto error; - } + BUG_ON(ret < 0); if (ret > 0 && path->slots[0] > 0 && check_prev) { leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, @@ -1172,10 +1100,8 @@ static noinline int run_delalloc_nocow(struct inode *inode, leaf = path->nodes[0]; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); - goto error; - } + if (ret < 0) + BUG_ON(1); if (ret > 0) break; leaf = path->nodes[0]; @@ -1263,10 +1189,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, ret = cow_file_range(inode, locked_page, cow_start, found_key.offset - 1, page_started, nr_written, 1); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto error; - } + BUG_ON(ret); cow_start = (u64)-1; } @@ -1275,7 +1198,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, struct extent_map_tree *em_tree; em_tree = &BTRFS_I(inode)->extent_tree; em = alloc_extent_map(); - BUG_ON(!em); /* -ENOMEM */ + BUG_ON(!em); em->start = cur_offset; em->orig_start = em->start; em->len = num_bytes; @@ -1301,16 +1224,13 @@ static noinline int run_delalloc_nocow(struct inode *inode, ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, num_bytes, num_bytes, type); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_reloc_clone_csums(inode, cur_offset, num_bytes); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto error; - } + BUG_ON(ret); } extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, @@ -1329,23 +1249,18 @@ static noinline int run_delalloc_nocow(struct inode *inode, if (cow_start != (u64)-1) { ret = cow_file_range(inode, locked_page, cow_start, end, page_started, nr_written, 1); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto error; - } + BUG_ON(ret); } -error: if (nolock) { - err = btrfs_end_transaction_nolock(trans, root); + ret = btrfs_end_transaction_nolock(trans, root); + BUG_ON(ret); } else { - err = btrfs_end_transaction(trans, root); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); } - if (!ret) - ret = err; - btrfs_free_path(path); - return ret; + return 0; } /* @@ -1510,11 +1425,10 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, map_length = length; ret = btrfs_map_block(map_tree, READ, logical, &map_length, NULL, 0); - /* Will always return 0 or 1 with map_multi == NULL */ - BUG_ON(ret < 0); + if (map_length < length + size) return 1; - return 0; + return ret; } /* @@ -1534,7 +1448,7 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw, int ret = 0; ret = btrfs_csum_one_bio(root, inode, bio, 0, 0); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); return 0; } @@ -1565,16 +1479,14 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; int skip_sum; - int metadata = 0; skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; if (btrfs_is_free_space_inode(root, inode)) - metadata = 2; - - ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); - if (ret) - return ret; + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); + else + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + BUG_ON(ret); if (!(rw & REQ_WRITE)) { if (bio_flags & EXTENT_BIO_COMPRESSED) { @@ -1659,7 +1571,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0, - &cached_state); + &cached_state, GFP_NOFS); /* already ordered? We're done */ if (PagePrivate2(page)) @@ -1763,15 +1675,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, */ ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes, &hint, 0); - if (ret) - goto out; + BUG_ON(ret); ins.objectid = btrfs_ino(inode); ins.offset = file_pos; ins.type = BTRFS_EXTENT_DATA_KEY; ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); - if (ret) - goto out; + BUG_ON(ret); leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); @@ -1799,10 +1709,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_alloc_reserved_file_extent(trans, root, root->root_key.objectid, btrfs_ino(inode), file_pos, &ins); -out: + BUG_ON(ret); btrfs_free_path(path); - return ret; + return 0; } /* @@ -1830,41 +1740,35 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) end - start + 1); if (!ret) return 0; - BUG_ON(!ordered_extent); /* Logic error */ + BUG_ON(!ordered_extent); nolock = btrfs_is_free_space_inode(root, inode); if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { - BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ + BUG_ON(!list_empty(&ordered_extent->list)); ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); if (!ret) { if (nolock) trans = btrfs_join_transaction_nolock(root); else trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); + BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_update_inode_fallback(trans, root, inode); - if (ret) /* -ENOMEM or corruption */ - btrfs_abort_transaction(trans, root, ret); + BUG_ON(ret); } goto out; } lock_extent_bits(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, - 0, &cached_state); + 0, &cached_state, GFP_NOFS); if (nolock) trans = btrfs_join_transaction_nolock(root); else trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - trans = NULL; - goto out_unlock; - } + BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) @@ -1875,6 +1779,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len); + BUG_ON(ret); } else { BUG_ON(root == root->fs_info->tree_root); ret = insert_reserved_file_extent(trans, inode, @@ -1888,14 +1793,11 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) unpin_extent_cache(&BTRFS_I(inode)->extent_tree, ordered_extent->file_offset, ordered_extent->len); + BUG_ON(ret); } unlock_extent_cached(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, &cached_state, GFP_NOFS); - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); - goto out; - } add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); @@ -1903,10 +1805,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { ret = btrfs_update_inode_fallback(trans, root, inode); - if (ret) { /* -ENOMEM or corruption */ - btrfs_abort_transaction(trans, root, ret); - goto out; - } + BUG_ON(ret); } ret = 0; out: @@ -1925,11 +1824,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) btrfs_put_ordered_extent(ordered_extent); return 0; -out_unlock: - unlock_extent_cached(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len - 1, &cached_state, GFP_NOFS); - goto out; } static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, @@ -2011,8 +1905,6 @@ struct delayed_iput { struct inode *inode; }; -/* JDM: If this is fs-wide, why can't we add a pointer to - * btrfs_inode instead and avoid the allocation? */ void btrfs_add_delayed_iput(struct inode *inode) { struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; @@ -2159,27 +2051,20 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* grab metadata reservation from transaction handle */ if (reserve) { ret = btrfs_orphan_reserve_metadata(trans, inode); - BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */ + BUG_ON(ret); } /* insert an orphan item to track this unlinked/truncated file */ if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); - if (ret && ret != -EEXIST) { - btrfs_abort_transaction(trans, root, ret); - return ret; - } - ret = 0; + BUG_ON(ret && ret != -EEXIST); } /* insert an orphan item to track subvolume contains orphan files */ if (insert >= 2) { ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, root->root_key.objectid); - if (ret && ret != -EEXIST) { - btrfs_abort_transaction(trans, root, ret); - return ret; - } + BUG_ON(ret); } return 0; } @@ -2209,7 +2094,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) if (trans && delete_item) { ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); - BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ + BUG_ON(ret); } if (release_rsv) @@ -2343,7 +2228,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) } ret = btrfs_del_orphan_item(trans, root, found_key.objectid); - BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ + BUG_ON(ret); btrfs_end_transaction(trans, root); continue; } @@ -2725,22 +2610,16 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, printk(KERN_INFO "btrfs failed to delete reference to %.*s, " "inode %llu parent %llu\n", name_len, name, (unsigned long long)ino, (unsigned long long)dir_ino); - btrfs_abort_transaction(trans, root, ret); goto err; } ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); - if (ret) { - btrfs_abort_transaction(trans, root, ret); + if (ret) goto err; - } ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, dir_ino); - if (ret != 0 && ret != -ENOENT) { - btrfs_abort_transaction(trans, root, ret); - goto err; - } + BUG_ON(ret != 0 && ret != -ENOENT); ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, index); @@ -2898,7 +2777,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, err = ret; goto out; } - BUG_ON(ret == 0); /* Corruption */ + BUG_ON(ret == 0); if (check_path_shared(root, path)) goto out; btrfs_release_path(path); @@ -2931,7 +2810,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, err = PTR_ERR(ref); goto out; } - BUG_ON(!ref); /* Logic error */ + BUG_ON(!ref); if (check_path_shared(root, path)) goto out; index = btrfs_inode_ref_index(path->nodes[0], ref); @@ -3038,42 +2917,23 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, name_len, -1); - if (IS_ERR_OR_NULL(di)) { - if (!di) - ret = -ENOENT; - else - ret = PTR_ERR(di); - goto out; - } + BUG_ON(IS_ERR_OR_NULL(di)); leaf = path->nodes[0]; btrfs_dir_item_key_to_cpu(leaf, di, &key); WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); ret = btrfs_delete_one_dir_name(trans, root, path, di); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto out; - } + BUG_ON(ret); btrfs_release_path(path); ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, objectid, root->root_key.objectid, dir_ino, &index, name, name_len); if (ret < 0) { - if (ret != -ENOENT) { - btrfs_abort_transaction(trans, root, ret); - goto out; - } + BUG_ON(ret != -ENOENT); di = btrfs_search_dir_index_item(root, path, dir_ino, name, name_len); - if (IS_ERR_OR_NULL(di)) { - if (!di) - ret = -ENOENT; - else - ret = PTR_ERR(di); - btrfs_abort_transaction(trans, root, ret); - goto out; - } + BUG_ON(IS_ERR_OR_NULL(di)); leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); @@ -3083,19 +2943,15 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, btrfs_release_path(path); ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto out; - } + BUG_ON(ret); btrfs_i_size_write(dir, dir->i_size - name_len * 2); dir->i_mtime = dir->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, dir); - if (ret) - btrfs_abort_transaction(trans, root, ret); -out: + BUG_ON(ret); + btrfs_free_path(path); - return ret; + return 0; } static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) @@ -3305,8 +3161,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, } size = btrfs_file_extent_calc_inline_size(size); - btrfs_truncate_item(trans, root, path, - size, 1); + ret = btrfs_truncate_item(trans, root, path, + size, 1); } else if (root->ref_cows) { inode_sub_bytes(inode, item_end + 1 - found_key.offset); @@ -3354,11 +3210,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, ret = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); - if (ret) { - btrfs_abort_transaction(trans, - root, ret); - goto error; - } + BUG_ON(ret); pending_del_nr = 0; } btrfs_release_path(path); @@ -3371,10 +3223,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, if (pending_del_nr) { ret = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); - if (ret) - btrfs_abort_transaction(trans, root, ret); + BUG_ON(ret); } -error: btrfs_free_path(path); return err; } @@ -3432,7 +3282,8 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) } wait_on_page_writeback(page); - lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state); + lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state, + GFP_NOFS); set_page_extent_mapped(page); ordered = btrfs_lookup_ordered_extent(inode, page_start); @@ -3508,7 +3359,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) btrfs_wait_ordered_range(inode, hole_start, block_end - hole_start); lock_extent_bits(io_tree, hole_start, block_end - 1, 0, - &cached_state); + &cached_state, GFP_NOFS); ordered = btrfs_lookup_ordered_extent(inode, hole_start); if (!ordered) break; @@ -3521,10 +3372,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) while (1) { em = btrfs_get_extent(inode, NULL, 0, cur_offset, block_end - cur_offset, 0); - if (IS_ERR(em)) { - err = PTR_ERR(em); - break; - } + BUG_ON(IS_ERR_OR_NULL(em)); last_byte = min(extent_map_end(em), block_end); last_byte = (last_byte + mask) & ~mask; if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { @@ -3541,7 +3389,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) cur_offset + hole_size, &hint_byte, 1); if (err) { - btrfs_abort_transaction(trans, root, err); + btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); break; } @@ -3551,7 +3399,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) 0, hole_size, 0, hole_size, 0, 0, 0); if (err) { - btrfs_abort_transaction(trans, root, err); + btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); break; } @@ -3931,7 +3779,7 @@ static void inode_tree_del(struct inode *inode) } } -void btrfs_invalidate_inodes(struct btrfs_root *root) +int btrfs_invalidate_inodes(struct btrfs_root *root) { struct rb_node *node; struct rb_node *prev; @@ -3991,6 +3839,7 @@ void btrfs_invalidate_inodes(struct btrfs_root *root) node = rb_next(node); } spin_unlock(&root->inode_lock); + return 0; } static int btrfs_init_locked_inode(struct inode *inode, void *p) @@ -4732,26 +4581,18 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, parent_ino, index); } - /* Nothing to clean up yet */ - if (ret) - return ret; + if (ret == 0) { + ret = btrfs_insert_dir_item(trans, root, name, name_len, + parent_inode, &key, + btrfs_inode_type(inode), index); + if (ret) + goto fail_dir_item; - ret = btrfs_insert_dir_item(trans, root, name, name_len, - parent_inode, &key, - btrfs_inode_type(inode), index); - if (ret == -EEXIST) - goto fail_dir_item; - else if (ret) { - btrfs_abort_transaction(trans, root, ret); - return ret; + btrfs_i_size_write(parent_inode, parent_inode->i_size + + name_len * 2); + parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; + ret = btrfs_update_inode(trans, root, parent_inode); } - - btrfs_i_size_write(parent_inode, parent_inode->i_size + - name_len * 2); - parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; - ret = btrfs_update_inode(trans, root, parent_inode); - if (ret) - btrfs_abort_transaction(trans, root, ret); return ret; fail_dir_item: @@ -4965,8 +4806,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, } else { struct dentry *parent = dentry->d_parent; err = btrfs_update_inode(trans, root, inode); - if (err) - goto fail; + BUG_ON(err); d_instantiate(dentry, inode); btrfs_log_new_name(trans, inode, NULL, parent); } @@ -5297,7 +5137,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, ret = uncompress_inline(path, inode, page, pg_offset, extent_offset, item); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } else { map = kmap(page); read_extent_buffer(leaf, map + pg_offset, ptr, @@ -5412,7 +5252,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, free_extent_map(em); return ERR_PTR(err); } - BUG_ON(!em); /* Error is always set */ return em; } @@ -5575,7 +5414,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, alloc_hint = get_extent_allocation_hint(inode, start, len); ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, - alloc_hint, &ins, 1); + alloc_hint, (u64)-1, &ins, 1); if (ret) { em = ERR_PTR(ret); goto out; @@ -5763,7 +5602,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, free_extent_map(em); /* DIO will do one hole at a time, so just unlock a sector */ unlock_extent(&BTRFS_I(inode)->io_tree, start, - start + root->sectorsize - 1); + start + root->sectorsize - 1, GFP_NOFS); return 0; } @@ -5904,7 +5743,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) } while (bvec <= bvec_end); unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, - dip->logical_offset + dip->bytes - 1); + dip->logical_offset + dip->bytes - 1, GFP_NOFS); bio->bi_private = dip->private; kfree(dip->csums); @@ -5955,7 +5794,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, ordered->file_offset + ordered->len - 1, 0, - &cached_state); + &cached_state, GFP_NOFS); if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { ret = btrfs_mark_extent_written(trans, inode, @@ -6029,7 +5868,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, int ret; struct btrfs_root *root = BTRFS_I(inode)->root; ret = btrfs_csum_one_bio(root, inode, bio, offset, 1); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); return 0; } @@ -6370,7 +6209,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, while (1) { lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, - 0, &cached_state); + 0, &cached_state, GFP_NOFS); /* * We're concerned with the entire range that we're going to be * doing DIO to, so we need to make sure theres no ordered @@ -6394,7 +6233,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, if (writing) { write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, - EXTENT_DELALLOC, NULL, &cached_state, + EXTENT_DELALLOC, 0, NULL, &cached_state, GFP_NOFS); if (ret) { clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, @@ -6524,7 +6363,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) btrfs_releasepage(page, GFP_NOFS); return; } - lock_extent_bits(tree, page_start, page_end, 0, &cached_state); + lock_extent_bits(tree, page_start, page_end, 0, &cached_state, + GFP_NOFS); ordered = btrfs_lookup_ordered_extent(page->mapping->host, page_offset(page)); if (ordered) { @@ -6546,7 +6386,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) } btrfs_put_ordered_extent(ordered); cached_state = NULL; - lock_extent_bits(tree, page_start, page_end, 0, &cached_state); + lock_extent_bits(tree, page_start, page_end, 0, &cached_state, + GFP_NOFS); } clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | @@ -6621,7 +6462,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) } wait_on_page_writeback(page); - lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state); + lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state, + GFP_NOFS); set_page_extent_mapped(page); /* @@ -6895,9 +6737,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, btrfs_i_size_write(inode, 0); err = btrfs_update_inode(trans, new_root, inode); + BUG_ON(err); iput(inode); - return err; + return 0; } struct inode *btrfs_alloc_inode(struct super_block *sb) @@ -6940,8 +6783,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) extent_map_tree_init(&ei->extent_tree); extent_io_tree_init(&ei->io_tree, &inode->i_data); extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); - ei->io_tree.track_uptodate = 1; - ei->io_failure_tree.track_uptodate = 1; mutex_init(&ei->log_mutex); mutex_init(&ei->delalloc_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); @@ -7231,10 +7072,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!ret) ret = btrfs_update_inode(trans, root, old_inode); } - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto out_fail; - } + BUG_ON(ret); if (new_inode) { new_inode->i_ctime = CURRENT_TIME; @@ -7252,14 +7090,11 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry->d_name.name, new_dentry->d_name.len); } - if (!ret && new_inode->i_nlink == 0) { + BUG_ON(ret); + if (new_inode->i_nlink == 0) { ret = btrfs_orphan_add(trans, new_dentry->d_inode); BUG_ON(ret); } - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto out_fail; - } } fixup_inode_flags(new_dir, old_inode); @@ -7267,10 +7102,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, ret = btrfs_add_link(trans, new_dir, old_inode, new_dentry->d_name.name, new_dentry->d_name.len, 0, index); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto out_fail; - } + BUG_ON(ret); if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { struct dentry *parent = new_dentry->d_parent; @@ -7483,7 +7315,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, } ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, - 0, *alloc_hint, &ins, 1); + 0, *alloc_hint, (u64)-1, &ins, 1); if (ret) { if (own_trans) btrfs_end_transaction(trans, root); @@ -7495,12 +7327,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, ins.offset, ins.offset, ins.offset, 0, 0, 0, BTRFS_FILE_EXTENT_PREALLOC); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - if (own_trans) - btrfs_end_transaction(trans, root); - break; - } + BUG_ON(ret); btrfs_drop_extent_cache(inode, cur_offset, cur_offset + ins.offset -1, 0); @@ -7522,13 +7349,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, } ret = btrfs_update_inode(trans, root, inode); - - if (ret) { - btrfs_abort_transaction(trans, root, ret); - if (own_trans) - btrfs_end_transaction(trans, root); - break; - } + BUG_ON(ret); if (own_trans) btrfs_end_transaction(trans, root); diff --git a/trunk/fs/btrfs/ioctl.c b/trunk/fs/btrfs/ioctl.c index 18cc23d164a8..d8b54715c2de 100644 --- a/trunk/fs/btrfs/ioctl.c +++ b/trunk/fs/btrfs/ioctl.c @@ -425,37 +425,22 @@ static noinline int create_subvol(struct btrfs_root *root, key.offset = (u64)-1; new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); - if (IS_ERR(new_root)) { - btrfs_abort_transaction(trans, root, PTR_ERR(new_root)); - ret = PTR_ERR(new_root); - goto fail; - } + BUG_ON(IS_ERR(new_root)); btrfs_record_root_in_trans(trans, new_root); ret = btrfs_create_subvol_root(trans, new_root, new_dirid); - if (ret) { - /* We potentially lose an unused inode item here */ - btrfs_abort_transaction(trans, root, ret); - goto fail; - } - /* * insert the directory item */ ret = btrfs_set_inode_index(dir, &index); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto fail; - } + BUG_ON(ret); ret = btrfs_insert_dir_item(trans, root, name, namelen, dir, &key, BTRFS_FT_DIR, index); - if (ret) { - btrfs_abort_transaction(trans, root, ret); + if (ret) goto fail; - } btrfs_i_size_write(dir, dir->i_size + namelen * 2); ret = btrfs_update_inode(trans, root, dir); @@ -784,31 +769,6 @@ static int find_new_extents(struct btrfs_root *root, return -ENOENT; } -/* - * Validaty check of prev em and next em: - * 1) no prev/next em - * 2) prev/next em is an hole/inline extent - */ -static int check_adjacent_extents(struct inode *inode, struct extent_map *em) -{ - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct extent_map *prev = NULL, *next = NULL; - int ret = 0; - - read_lock(&em_tree->lock); - prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1); - next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1); - read_unlock(&em_tree->lock); - - if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) && - (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)) - ret = 1; - free_extent_map(prev); - free_extent_map(next); - - return ret; -} - static int should_defrag_range(struct inode *inode, u64 start, u64 len, int thresh, u64 *last_len, u64 *skip, u64 *defrag_end) @@ -837,25 +797,17 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, if (!em) { /* get the big lock and read metadata off disk */ - lock_extent(io_tree, start, start + len - 1); + lock_extent(io_tree, start, start + len - 1, GFP_NOFS); em = btrfs_get_extent(inode, NULL, 0, start, len, 0); - unlock_extent(io_tree, start, start + len - 1); + unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); if (IS_ERR(em)) return 0; } /* this will cover holes, and inline extents */ - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + if (em->block_start >= EXTENT_MAP_LAST_BYTE) ret = 0; - goto out; - } - - /* If we have nothing to merge with us, just skip. */ - if (check_adjacent_extents(inode, em)) { - ret = 0; - goto out; - } /* * we hit a real extent, if it is big don't bother defragging it again @@ -863,7 +815,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) ret = 0; -out: /* * last_len ends up being a counter of how many bytes we've defragged. * every time we choose not to defrag an extent, we reset *last_len @@ -905,7 +856,6 @@ static int cluster_pages_for_defrag(struct inode *inode, u64 isize = i_size_read(inode); u64 page_start; u64 page_end; - u64 page_cnt; int ret; int i; int i_done; @@ -914,21 +864,19 @@ static int cluster_pages_for_defrag(struct inode *inode, struct extent_io_tree *tree; gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); - file_end = (isize - 1) >> PAGE_CACHE_SHIFT; - if (!isize || start_index > file_end) + if (isize == 0) return 0; - - page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); + file_end = (isize - 1) >> PAGE_CACHE_SHIFT; ret = btrfs_delalloc_reserve_space(inode, - page_cnt << PAGE_CACHE_SHIFT); + num_pages << PAGE_CACHE_SHIFT); if (ret) return ret; i_done = 0; tree = &BTRFS_I(inode)->io_tree; /* step one, lock all the pages */ - for (i = 0; i < page_cnt; i++) { + for (i = 0; i < num_pages; i++) { struct page *page; again: page = find_or_create_page(inode->i_mapping, @@ -939,10 +887,10 @@ static int cluster_pages_for_defrag(struct inode *inode, page_start = page_offset(page); page_end = page_start + PAGE_CACHE_SIZE - 1; while (1) { - lock_extent(tree, page_start, page_end); + lock_extent(tree, page_start, page_end, GFP_NOFS); ordered = btrfs_lookup_ordered_extent(inode, page_start); - unlock_extent(tree, page_start, page_end); + unlock_extent(tree, page_start, page_end, GFP_NOFS); if (!ordered) break; @@ -950,15 +898,6 @@ static int cluster_pages_for_defrag(struct inode *inode, btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); lock_page(page); - /* - * we unlocked the page above, so we need check if - * it was released or not. - */ - if (page->mapping != inode->i_mapping) { - unlock_page(page); - page_cache_release(page); - goto again; - } } if (!PageUptodate(page)) { @@ -972,6 +911,15 @@ static int cluster_pages_for_defrag(struct inode *inode, } } + isize = i_size_read(inode); + file_end = (isize - 1) >> PAGE_CACHE_SHIFT; + if (!isize || page->index > file_end) { + /* whoops, we blew past eof, skip this page */ + unlock_page(page); + page_cache_release(page); + break; + } + if (page->mapping != inode->i_mapping) { unlock_page(page); page_cache_release(page); @@ -998,18 +946,19 @@ static int cluster_pages_for_defrag(struct inode *inode, page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE; lock_extent_bits(&BTRFS_I(inode)->io_tree, - page_start, page_end - 1, 0, &cached_state); + page_start, page_end - 1, 0, &cached_state, + GFP_NOFS); clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, GFP_NOFS); - if (i_done != page_cnt) { + if (i_done != num_pages) { spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); btrfs_delalloc_release_space(inode, - (page_cnt - i_done) << PAGE_CACHE_SHIFT); + (num_pages - i_done) << PAGE_CACHE_SHIFT); } @@ -1034,7 +983,7 @@ static int cluster_pages_for_defrag(struct inode *inode, unlock_page(pages[i]); page_cache_release(pages[i]); } - btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT); + btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT); return ret; } @@ -1140,9 +1089,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, if (!(inode->i_sb->s_flags & MS_ACTIVE)) break; - if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, extent_thresh, - &last_len, &skip, &defrag_end)) { + if (!newer_than && + !should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, + extent_thresh, + &last_len, &skip, + &defrag_end)) { unsigned long next; /* * the should_defrag function tells us how much to skip @@ -1171,24 +1123,17 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, ra_index += max_cluster; } - mutex_lock(&inode->i_mutex); ret = cluster_pages_for_defrag(inode, pages, i, cluster); - if (ret < 0) { - mutex_unlock(&inode->i_mutex); + if (ret < 0) goto out_ra; - } defrag_count += ret; balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); - mutex_unlock(&inode->i_mutex); if (newer_than) { if (newer_off == (u64)-1) break; - if (ret > 0) - i += ret; - newer_off = max(newer_off + 1, (u64)i << PAGE_CACHE_SHIFT); @@ -2021,11 +1966,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, dest->root_key.objectid, dentry->d_name.name, dentry->d_name.len); - if (ret) { - err = ret; - btrfs_abort_transaction(trans, root, ret); - goto out_end_trans; - } + BUG_ON(ret); btrfs_record_root_in_trans(trans, dest); @@ -2038,16 +1979,11 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, dest->root_key.objectid); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - err = ret; - goto out_end_trans; - } + BUG_ON(ret); } -out_end_trans: + ret = btrfs_end_transaction(trans, root); - if (ret && !err) - err = ret; + BUG_ON(ret); inode->i_flags |= S_DEAD; out_up_write: up_write(&root->fs_info->subvol_sem); @@ -2390,13 +2326,13 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, another, and lock file content */ while (1) { struct btrfs_ordered_extent *ordered; - lock_extent(&BTRFS_I(src)->io_tree, off, off+len); + lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); ordered = btrfs_lookup_first_ordered_extent(src, off+len); if (!ordered && !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len, EXTENT_DELALLOC, 0, NULL)) break; - unlock_extent(&BTRFS_I(src)->io_tree, off, off+len); + unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); if (ordered) btrfs_put_ordered_extent(ordered); btrfs_wait_ordered_range(src, off, len); @@ -2511,21 +2447,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, new_key.offset, new_key.offset + datal, &hint_byte, 1); - if (ret) { - btrfs_abort_transaction(trans, root, - ret); - btrfs_end_transaction(trans, root); - goto out; - } + BUG_ON(ret); ret = btrfs_insert_empty_item(trans, root, path, &new_key, size); - if (ret) { - btrfs_abort_transaction(trans, root, - ret); - btrfs_end_transaction(trans, root); - goto out; - } + BUG_ON(ret); leaf = path->nodes[0]; slot = path->slots[0]; @@ -2552,15 +2478,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, btrfs_ino(inode), new_key.offset - datao, 0); - if (ret) { - btrfs_abort_transaction(trans, - root, - ret); - btrfs_end_transaction(trans, - root); - goto out; - - } + BUG_ON(ret); } } else if (type == BTRFS_FILE_EXTENT_INLINE) { u64 skip = 0; @@ -2585,21 +2503,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, new_key.offset, new_key.offset + datal, &hint_byte, 1); - if (ret) { - btrfs_abort_transaction(trans, root, - ret); - btrfs_end_transaction(trans, root); - goto out; - } + BUG_ON(ret); ret = btrfs_insert_empty_item(trans, root, path, &new_key, size); - if (ret) { - btrfs_abort_transaction(trans, root, - ret); - btrfs_end_transaction(trans, root); - goto out; - } + BUG_ON(ret); if (skip) { u32 start = @@ -2633,12 +2541,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, btrfs_i_size_write(inode, endoff); ret = btrfs_update_inode(trans, root, inode); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - btrfs_end_transaction(trans, root); - goto out; - } - ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + btrfs_end_transaction(trans, root); } next: btrfs_release_path(path); @@ -2647,7 +2551,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, ret = 0; out: btrfs_release_path(path); - unlock_extent(&BTRFS_I(src)->io_tree, off, off+len); + unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); out_unlock: mutex_unlock(&src->i_mutex); mutex_unlock(&inode->i_mutex); @@ -3162,8 +3066,8 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, goto out; extent_item_pos = loi->logical - key.objectid; - ret = iterate_extent_inodes(root->fs_info, key.objectid, - extent_item_pos, 0, build_ino_list, + ret = iterate_extent_inodes(root->fs_info, path, key.objectid, + extent_item_pos, build_ino_list, inodes); if (ret < 0) diff --git a/trunk/fs/btrfs/locking.c b/trunk/fs/btrfs/locking.c index 272f911203ff..5e178d8f7167 100644 --- a/trunk/fs/btrfs/locking.c +++ b/trunk/fs/btrfs/locking.c @@ -208,7 +208,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) * take a spinning write lock. This will wait for both * blocking readers or writers */ -void btrfs_tree_lock(struct extent_buffer *eb) +int btrfs_tree_lock(struct extent_buffer *eb) { again: wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); @@ -230,12 +230,13 @@ void btrfs_tree_lock(struct extent_buffer *eb) atomic_inc(&eb->spinning_writers); atomic_inc(&eb->write_locks); eb->lock_owner = current->pid; + return 0; } /* * drop a spinning or a blocking write lock. */ -void btrfs_tree_unlock(struct extent_buffer *eb) +int btrfs_tree_unlock(struct extent_buffer *eb) { int blockers = atomic_read(&eb->blocking_writers); @@ -254,6 +255,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb) atomic_dec(&eb->spinning_writers); write_unlock(&eb->lock); } + return 0; } void btrfs_assert_tree_locked(struct extent_buffer *eb) diff --git a/trunk/fs/btrfs/locking.h b/trunk/fs/btrfs/locking.h index ca52681e5f40..17247ddb81a0 100644 --- a/trunk/fs/btrfs/locking.h +++ b/trunk/fs/btrfs/locking.h @@ -24,8 +24,8 @@ #define BTRFS_WRITE_LOCK_BLOCKING 3 #define BTRFS_READ_LOCK_BLOCKING 4 -void btrfs_tree_lock(struct extent_buffer *eb); -void btrfs_tree_unlock(struct extent_buffer *eb); +int btrfs_tree_lock(struct extent_buffer *eb); +int btrfs_tree_unlock(struct extent_buffer *eb); int btrfs_try_spin_lock(struct extent_buffer *eb); void btrfs_tree_read_lock(struct extent_buffer *eb); diff --git a/trunk/fs/btrfs/ordered-data.c b/trunk/fs/btrfs/ordered-data.c index bbf6d0d9aebe..a1c940425307 100644 --- a/trunk/fs/btrfs/ordered-data.c +++ b/trunk/fs/btrfs/ordered-data.c @@ -59,14 +59,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, return NULL; } -static void ordered_data_tree_panic(struct inode *inode, int errno, - u64 offset) -{ - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset " - "%llu\n", (unsigned long long)offset); -} - /* * look for a given offset in the tree, and if it can't be found return the * first lesser offset @@ -215,8 +207,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, spin_lock(&tree->lock); node = tree_insert(&tree->tree, file_offset, &entry->rb_node); - if (node) - ordered_data_tree_panic(inode, -EEXIST, file_offset); + BUG_ON(node); spin_unlock(&tree->lock); spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); @@ -224,6 +215,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, &BTRFS_I(inode)->root->fs_info->ordered_extents); spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + BUG_ON(node); return 0; } @@ -257,9 +249,9 @@ int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, * when an ordered extent is finished. If the list covers more than one * ordered extent, it is split across multiples. */ -void btrfs_add_ordered_sum(struct inode *inode, - struct btrfs_ordered_extent *entry, - struct btrfs_ordered_sum *sum) +int btrfs_add_ordered_sum(struct inode *inode, + struct btrfs_ordered_extent *entry, + struct btrfs_ordered_sum *sum) { struct btrfs_ordered_inode_tree *tree; @@ -267,6 +259,7 @@ void btrfs_add_ordered_sum(struct inode *inode, spin_lock(&tree->lock); list_add_tail(&sum->list, &entry->list); spin_unlock(&tree->lock); + return 0; } /* @@ -391,7 +384,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, * used to drop a reference on an ordered extent. This will free * the extent if the last reference is dropped */ -void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) +int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) { struct list_head *cur; struct btrfs_ordered_sum *sum; @@ -407,6 +400,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) } kfree(entry); } + return 0; } /* @@ -414,8 +408,8 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) * and you must wake_up entry->wait. You must hold the tree lock * while you call this function. */ -static void __btrfs_remove_ordered_extent(struct inode *inode, - struct btrfs_ordered_extent *entry) +static int __btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) { struct btrfs_ordered_inode_tree *tree; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -442,30 +436,35 @@ static void __btrfs_remove_ordered_extent(struct inode *inode, list_del_init(&BTRFS_I(inode)->ordered_operations); } spin_unlock(&root->fs_info->ordered_extent_lock); + + return 0; } /* * remove an ordered extent from the tree. No references are dropped * but any waiters are woken. */ -void btrfs_remove_ordered_extent(struct inode *inode, - struct btrfs_ordered_extent *entry) +int btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) { struct btrfs_ordered_inode_tree *tree; + int ret; tree = &BTRFS_I(inode)->ordered_tree; spin_lock(&tree->lock); - __btrfs_remove_ordered_extent(inode, entry); + ret = __btrfs_remove_ordered_extent(inode, entry); spin_unlock(&tree->lock); wake_up(&entry->wait); + + return ret; } /* * wait for all the ordered extents in a root. This is done when balancing * space between drives. */ -void btrfs_wait_ordered_extents(struct btrfs_root *root, - int nocow_only, int delay_iput) +int btrfs_wait_ordered_extents(struct btrfs_root *root, + int nocow_only, int delay_iput) { struct list_head splice; struct list_head *cur; @@ -513,6 +512,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, spin_lock(&root->fs_info->ordered_extent_lock); } spin_unlock(&root->fs_info->ordered_extent_lock); + return 0; } /* @@ -525,7 +525,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, * extra check to make sure the ordered operation list really is empty * before we return */ -void btrfs_run_ordered_operations(struct btrfs_root *root, int wait) +int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) { struct btrfs_inode *btrfs_inode; struct inode *inode; @@ -573,6 +573,8 @@ void btrfs_run_ordered_operations(struct btrfs_root *root, int wait) spin_unlock(&root->fs_info->ordered_extent_lock); mutex_unlock(&root->fs_info->ordered_operations_mutex); + + return 0; } /* @@ -607,7 +609,7 @@ void btrfs_start_ordered_extent(struct inode *inode, /* * Used to wait on ordered extents across a large range of bytes. */ -void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) +int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) { u64 end; u64 orig_end; @@ -662,6 +664,7 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) schedule_timeout(1); goto again; } + return 0; } /* @@ -945,8 +948,9 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, * If trans is not null, we'll do a friendly check for a transaction that * is already flushing things and force the IO down ourselves. */ -void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode) +int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) { u64 last_mod; @@ -957,7 +961,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, * commit, we can safely return without doing anything */ if (last_mod < root->fs_info->last_trans_committed) - return; + return 0; /* * the transaction is already committing. Just start the IO and @@ -965,7 +969,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, */ if (trans && root->fs_info->running_transaction->blocked) { btrfs_wait_ordered_range(inode, 0, (u64)-1); - return; + return 0; } spin_lock(&root->fs_info->ordered_extent_lock); @@ -974,4 +978,6 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, &root->fs_info->ordered_operations); } spin_unlock(&root->fs_info->ordered_extent_lock); + + return 0; } diff --git a/trunk/fs/btrfs/ordered-data.h b/trunk/fs/btrfs/ordered-data.h index c355ad4dc1a6..ff1f69aa1883 100644 --- a/trunk/fs/btrfs/ordered-data.h +++ b/trunk/fs/btrfs/ordered-data.h @@ -138,8 +138,8 @@ btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) t->last = NULL; } -void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); -void btrfs_remove_ordered_extent(struct inode *inode, +int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); +int btrfs_remove_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry); int btrfs_dec_test_ordered_pending(struct inode *inode, struct btrfs_ordered_extent **cached, @@ -154,14 +154,14 @@ int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type, int compress_type); -void btrfs_add_ordered_sum(struct inode *inode, - struct btrfs_ordered_extent *entry, - struct btrfs_ordered_sum *sum); +int btrfs_add_ordered_sum(struct inode *inode, + struct btrfs_ordered_extent *entry, + struct btrfs_ordered_sum *sum); struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, u64 file_offset); void btrfs_start_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry, int wait); -void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); +int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); struct btrfs_ordered_extent * btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, @@ -170,10 +170,10 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); -void btrfs_run_ordered_operations(struct btrfs_root *root, int wait); -void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode); -void btrfs_wait_ordered_extents(struct btrfs_root *root, - int nocow_only, int delay_iput); +int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); +int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode); +int btrfs_wait_ordered_extents(struct btrfs_root *root, + int nocow_only, int delay_iput); #endif diff --git a/trunk/fs/btrfs/orphan.c b/trunk/fs/btrfs/orphan.c index 24cad1695af7..f8be250963a0 100644 --- a/trunk/fs/btrfs/orphan.c +++ b/trunk/fs/btrfs/orphan.c @@ -58,7 +58,7 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) goto out; - if (ret) { /* JDM: Really? */ + if (ret) { ret = -ENOENT; goto out; } diff --git a/trunk/fs/btrfs/reada.c b/trunk/fs/btrfs/reada.c index dc5d33146fdb..22db04550f6a 100644 --- a/trunk/fs/btrfs/reada.c +++ b/trunk/fs/btrfs/reada.c @@ -54,6 +54,7 @@ * than the 2 started one after another. */ +#define MAX_MIRRORS 2 #define MAX_IN_FLIGHT 6 struct reada_extctl { @@ -70,7 +71,7 @@ struct reada_extent { struct list_head extctl; struct kref refcnt; spinlock_t lock; - struct reada_zone *zones[BTRFS_MAX_MIRRORS]; + struct reada_zone *zones[MAX_MIRRORS]; int nzones; struct btrfs_device *scheduled_for; }; @@ -83,8 +84,7 @@ struct reada_zone { spinlock_t lock; int locked; struct btrfs_device *device; - struct btrfs_device *devs[BTRFS_MAX_MIRRORS]; /* full list, incl - * self */ + struct btrfs_device *devs[MAX_MIRRORS]; /* full list, incl self */ int ndevs; struct kref refcnt; }; @@ -365,9 +365,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, if (ret || !bbio || length < blocksize) goto error; - if (bbio->num_stripes > BTRFS_MAX_MIRRORS) { + if (bbio->num_stripes > MAX_MIRRORS) { printk(KERN_ERR "btrfs readahead: more than %d copies not " - "supported", BTRFS_MAX_MIRRORS); + "supported", MAX_MIRRORS); goto error; } diff --git a/trunk/fs/btrfs/relocation.c b/trunk/fs/btrfs/relocation.c index 017281dbb2a7..8c1aae2c845d 100644 --- a/trunk/fs/btrfs/relocation.c +++ b/trunk/fs/btrfs/relocation.c @@ -326,19 +326,6 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) return NULL; } -void backref_tree_panic(struct rb_node *rb_node, int errno, - u64 bytenr) -{ - - struct btrfs_fs_info *fs_info = NULL; - struct backref_node *bnode = rb_entry(rb_node, struct backref_node, - rb_node); - if (bnode->root) - fs_info = bnode->root->fs_info; - btrfs_panic(fs_info, errno, "Inconsistency in backref cache " - "found at offset %llu\n", (unsigned long long)bytenr); -} - /* * walk up backref nodes until reach node presents tree root */ @@ -465,8 +452,7 @@ static void update_backref_node(struct backref_cache *cache, rb_erase(&node->rb_node, &cache->rb_root); node->bytenr = bytenr; rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); - if (rb_node) - backref_tree_panic(rb_node, -EEXIST, bytenr); + BUG_ON(rb_node); } /* @@ -1013,8 +999,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, if (!cowonly) { rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); - if (rb_node) - backref_tree_panic(rb_node, -EEXIST, node->bytenr); + BUG_ON(rb_node); list_add_tail(&node->lower, &cache->leaves); } @@ -1049,9 +1034,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, if (!cowonly) { rb_node = tree_insert(&cache->rb_root, upper->bytenr, &upper->rb_node); - if (rb_node) - backref_tree_panic(rb_node, -EEXIST, - upper->bytenr); + BUG_ON(rb_node); } list_add_tail(&edge->list[UPPER], &upper->lower); @@ -1197,8 +1180,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans, rb_node = tree_insert(&cache->rb_root, new_node->bytenr, &new_node->rb_node); - if (rb_node) - backref_tree_panic(rb_node, -EEXIST, new_node->bytenr); + BUG_ON(rb_node); if (!new_node->lowest) { list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { @@ -1221,15 +1203,14 @@ static int clone_backref_node(struct btrfs_trans_handle *trans, /* * helper to add 'address of tree root -> reloc tree' mapping */ -static int __must_check __add_reloc_root(struct btrfs_root *root) +static int __add_reloc_root(struct btrfs_root *root) { struct rb_node *rb_node; struct mapping_node *node; struct reloc_control *rc = root->fs_info->reloc_ctl; node = kmalloc(sizeof(*node), GFP_NOFS); - if (!node) - return -ENOMEM; + BUG_ON(!node); node->bytenr = root->node->start; node->data = root; @@ -1238,12 +1219,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) rb_node = tree_insert(&rc->reloc_root_tree.rb_root, node->bytenr, &node->rb_node); spin_unlock(&rc->reloc_root_tree.lock); - if (rb_node) { - kfree(node); - btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " - "for start=%llu while inserting into relocation " - "tree\n"); - } + BUG_ON(rb_node); list_add_tail(&root->root_list, &rc->reloc_roots); return 0; @@ -1276,8 +1252,7 @@ static int __update_reloc_root(struct btrfs_root *root, int del) rb_node = tree_insert(&rc->reloc_root_tree.rb_root, node->bytenr, &node->rb_node); spin_unlock(&rc->reloc_root_tree.lock); - if (rb_node) - backref_tree_panic(rb_node, -EEXIST, node->bytenr); + BUG_ON(rb_node); } else { list_del_init(&root->root_list); kfree(node); @@ -1359,7 +1334,6 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *reloc_root; struct reloc_control *rc = root->fs_info->reloc_ctl; int clear_rsv = 0; - int ret; if (root->reloc_root) { reloc_root = root->reloc_root; @@ -1379,8 +1353,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, if (clear_rsv) trans->block_rsv = NULL; - ret = __add_reloc_root(reloc_root); - BUG_ON(ret < 0); + __add_reloc_root(reloc_root); root->reloc_root = reloc_root; return 0; } @@ -1604,14 +1577,15 @@ int replace_file_extents(struct btrfs_trans_handle *trans, WARN_ON(!IS_ALIGNED(end, root->sectorsize)); end--; ret = try_lock_extent(&BTRFS_I(inode)->io_tree, - key.offset, end); + key.offset, end, + GFP_NOFS); if (!ret) continue; btrfs_drop_extent_cache(inode, key.offset, end, 1); unlock_extent(&BTRFS_I(inode)->io_tree, - key.offset, end); + key.offset, end, GFP_NOFS); } } @@ -1982,9 +1956,9 @@ static int invalidate_extent_cache(struct btrfs_root *root, } /* the lock_extent waits for readpage to complete */ - lock_extent(&BTRFS_I(inode)->io_tree, start, end); + lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); btrfs_drop_extent_cache(inode, start, end, 1); - unlock_extent(&BTRFS_I(inode)->io_tree, start, end); + unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); } return 0; } @@ -2272,8 +2246,7 @@ int merge_reloc_roots(struct reloc_control *rc) } else { list_del_init(&reloc_root->root_list); } - ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); - BUG_ON(ret < 0); + btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); } if (found) { @@ -2889,12 +2862,12 @@ int prealloc_file_extent_cluster(struct inode *inode, else end = cluster->end - offset; - lock_extent(&BTRFS_I(inode)->io_tree, start, end); + lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); num_bytes = end + 1 - start; ret = btrfs_prealloc_file_range(inode, 0, start, num_bytes, num_bytes, end + 1, &alloc_hint); - unlock_extent(&BTRFS_I(inode)->io_tree, start, end); + unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); if (ret) break; nr++; @@ -2926,7 +2899,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, em->bdev = root->fs_info->fs_devices->latest_bdev; set_bit(EXTENT_FLAG_PINNED, &em->flags); - lock_extent(&BTRFS_I(inode)->io_tree, start, end); + lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); while (1) { write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); @@ -2937,7 +2910,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, } btrfs_drop_extent_cache(inode, start, end, 0); } - unlock_extent(&BTRFS_I(inode)->io_tree, start, end); + unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); return ret; } @@ -3017,7 +2990,8 @@ static int relocate_file_extent_cluster(struct inode *inode, page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end); + lock_extent(&BTRFS_I(inode)->io_tree, + page_start, page_end, GFP_NOFS); set_page_extent_mapped(page); @@ -3033,7 +3007,7 @@ static int relocate_file_extent_cluster(struct inode *inode, set_page_dirty(page); unlock_extent(&BTRFS_I(inode)->io_tree, - page_start, page_end); + page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); @@ -3180,8 +3154,7 @@ static int add_tree_block(struct reloc_control *rc, block->key_ready = 0; rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); - if (rb_node) - backref_tree_panic(rb_node, -EEXIST, block->bytenr); + BUG_ON(rb_node); return 0; } @@ -3453,9 +3426,7 @@ static int find_data_references(struct reloc_control *rc, block->key_ready = 1; rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); - if (rb_node) - backref_tree_panic(rb_node, -EEXIST, - block->bytenr); + BUG_ON(rb_node); } if (counted) added = 1; @@ -4102,11 +4073,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) { struct btrfs_trans_handle *trans; - int ret, err; + int ret; trans = btrfs_start_transaction(root->fs_info->tree_root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); + BUG_ON(IS_ERR(trans)); memset(&root->root_item.drop_progress, 0, sizeof(root->root_item.drop_progress)); @@ -4114,11 +4084,11 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) btrfs_set_root_refs(&root->root_item, 0); ret = btrfs_update_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); + BUG_ON(ret); - err = btrfs_end_transaction(trans, root->fs_info->tree_root); - if (err) - return err; - return ret; + ret = btrfs_end_transaction(trans, root->fs_info->tree_root); + BUG_ON(ret); + return 0; } /* @@ -4186,11 +4156,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) err = ret; goto out; } - ret = mark_garbage_root(reloc_root); - if (ret < 0) { - err = ret; - goto out; - } + mark_garbage_root(reloc_root); } } @@ -4236,19 +4202,13 @@ int btrfs_recover_relocation(struct btrfs_root *root) fs_root = read_fs_root(root->fs_info, reloc_root->root_key.offset); - if (IS_ERR(fs_root)) { - err = PTR_ERR(fs_root); - goto out_free; - } + BUG_ON(IS_ERR(fs_root)); - err = __add_reloc_root(reloc_root); - BUG_ON(err < 0); /* -ENOMEM or logic error */ + __add_reloc_root(reloc_root); fs_root->reloc_root = reloc_root; } - err = btrfs_commit_transaction(trans, rc->extent_root); - if (err) - goto out_free; + btrfs_commit_transaction(trans, rc->extent_root); merge_reloc_roots(rc); @@ -4258,7 +4218,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) if (IS_ERR(trans)) err = PTR_ERR(trans); else - err = btrfs_commit_transaction(trans, rc->extent_root); + btrfs_commit_transaction(trans, rc->extent_root); out_free: kfree(rc); out: @@ -4307,8 +4267,6 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, disk_bytenr + len - 1, &list, 0); - if (ret) - goto out; while (!list_empty(&list)) { sums = list_entry(list.next, struct btrfs_ordered_sum, list); @@ -4326,7 +4284,6 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) btrfs_add_ordered_sum(inode, ordered, sums); } -out: btrfs_put_ordered_extent(ordered); return ret; } @@ -4423,7 +4380,7 @@ void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, * called after snapshot is created. migrate block reservation * and create reloc root for the newly created snapshot */ -int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, +void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, struct btrfs_pending_snapshot *pending) { struct btrfs_root *root = pending->root; @@ -4433,7 +4390,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, int ret; if (!root->reloc_root) - return 0; + return; rc = root->fs_info->reloc_ctl; rc->merging_rsv_size += rc->nodes_relocated; @@ -4442,21 +4399,18 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_block_rsv_migrate(&pending->block_rsv, rc->block_rsv, rc->nodes_relocated); - if (ret) - return ret; + BUG_ON(ret); } new_root = pending->snap; reloc_root = create_reloc_root(trans, root->reloc_root, new_root->root_key.objectid); - if (IS_ERR(reloc_root)) - return PTR_ERR(reloc_root); - ret = __add_reloc_root(reloc_root); - BUG_ON(ret < 0); + __add_reloc_root(reloc_root); new_root->reloc_root = reloc_root; - if (rc->create_reloc_tree) + if (rc->create_reloc_tree) { ret = clone_backref_node(trans, rc, root, reloc_root); - return ret; + BUG_ON(ret); + } } diff --git a/trunk/fs/btrfs/root-tree.c b/trunk/fs/btrfs/root-tree.c index 24fb8ce4e071..f4099904565a 100644 --- a/trunk/fs/btrfs/root-tree.c +++ b/trunk/fs/btrfs/root-tree.c @@ -93,14 +93,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root unsigned long ptr; path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - + BUG_ON(!path); ret = btrfs_search_slot(trans, root, key, path, 0, 1); - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + if (ret < 0) goto out; - } if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); @@ -120,10 +116,13 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } -int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_key *key, struct btrfs_root_item *item) +int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item) { - return btrfs_insert_item(trans, root, key, item, sizeof(*item)); + int ret; + ret = btrfs_insert_item(trans, root, key, item, sizeof(*item)); + return ret; } /* @@ -385,8 +384,6 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, * * For a back ref the root_id is the id of the subvol or snapshot and * ref_id is the id of the tree referencing it. - * - * Will return 0, -ENOMEM, or anything from the CoW path */ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root, @@ -410,11 +407,7 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, again: ret = btrfs_insert_empty_item(trans, tree_root, path, &key, sizeof(*ref) + name_len); - if (ret) { - btrfs_abort_transaction(trans, tree_root, ret); - btrfs_free_path(path); - return ret; - } + BUG_ON(ret); leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); diff --git a/trunk/fs/btrfs/scrub.c b/trunk/fs/btrfs/scrub.c index 90acc82046c3..390e7102b0ff 100644 --- a/trunk/fs/btrfs/scrub.c +++ b/trunk/fs/btrfs/scrub.c @@ -36,30 +36,37 @@ * Future enhancements: * - In case an unrepairable extent is encountered, track which files are * affected and report them + * - In case of a read error on files with nodatasum, map the file and read + * the extent to trigger a writeback of the good copy * - track and record media errors, throw out bad devices * - add a mode to also read unallocated space */ -struct scrub_block; +struct scrub_bio; +struct scrub_page; struct scrub_dev; +static void scrub_bio_end_io(struct bio *bio, int err); +static void scrub_checksum(struct btrfs_work *work); +static int scrub_checksum_data(struct scrub_dev *sdev, + struct scrub_page *spag, void *buffer); +static int scrub_checksum_tree_block(struct scrub_dev *sdev, + struct scrub_page *spag, u64 logical, + void *buffer); +static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer); +static int scrub_fixup_check(struct scrub_bio *sbio, int ix); +static void scrub_fixup_end_io(struct bio *bio, int err); +static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, + struct page *page); +static void scrub_fixup(struct scrub_bio *sbio, int ix); #define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ #define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */ -#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ struct scrub_page { - struct scrub_block *sblock; - struct page *page; - struct block_device *bdev; u64 flags; /* extent flags */ u64 generation; - u64 logical; - u64 physical; - struct { - unsigned int mirror_num:8; - unsigned int have_csum:1; - unsigned int io_error:1; - }; + int mirror_num; + int have_csum; u8 csum[BTRFS_CSUM_SIZE]; }; @@ -70,25 +77,12 @@ struct scrub_bio { int err; u64 logical; u64 physical; - struct scrub_page *pagev[SCRUB_PAGES_PER_BIO]; - int page_count; + struct scrub_page spag[SCRUB_PAGES_PER_BIO]; + u64 count; int next_free; struct btrfs_work work; }; -struct scrub_block { - struct scrub_page pagev[SCRUB_MAX_PAGES_PER_BLOCK]; - int page_count; - atomic_t outstanding_pages; - atomic_t ref_count; /* free mem on transition to zero */ - struct scrub_dev *sdev; - struct { - unsigned int header_error:1; - unsigned int checksum_error:1; - unsigned int no_io_error_seen:1; - }; -}; - struct scrub_dev { struct scrub_bio *bios[SCRUB_BIOS_PER_DEV]; struct btrfs_device *dev; @@ -102,10 +96,6 @@ struct scrub_dev { struct list_head csum_list; atomic_t cancel_req; int readonly; - int pages_per_bio; /* <= SCRUB_PAGES_PER_BIO */ - u32 sectorsize; - u32 nodesize; - u32 leafsize; /* * statistics */ @@ -134,43 +124,6 @@ struct scrub_warning { int scratch_bufsize; }; - -static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); -static int scrub_setup_recheck_block(struct scrub_dev *sdev, - struct btrfs_mapping_tree *map_tree, - u64 length, u64 logical, - struct scrub_block *sblock); -static int scrub_recheck_block(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, int is_metadata, - int have_csum, u8 *csum, u64 generation, - u16 csum_size); -static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, - int is_metadata, int have_csum, - const u8 *csum, u64 generation, - u16 csum_size); -static void scrub_complete_bio_end_io(struct bio *bio, int err); -static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, - struct scrub_block *sblock_good, - int force_write); -static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, - struct scrub_block *sblock_good, - int page_num, int force_write); -static int scrub_checksum_data(struct scrub_block *sblock); -static int scrub_checksum_tree_block(struct scrub_block *sblock); -static int scrub_checksum_super(struct scrub_block *sblock); -static void scrub_block_get(struct scrub_block *sblock); -static void scrub_block_put(struct scrub_block *sblock); -static int scrub_add_page_to_bio(struct scrub_dev *sdev, - struct scrub_page *spage); -static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, - u64 physical, u64 flags, u64 gen, int mirror_num, - u8 *csum, int force); -static void scrub_bio_end_io(struct bio *bio, int err); -static void scrub_bio_end_io_worker(struct btrfs_work *work); -static void scrub_block_complete(struct scrub_block *sblock); - - static void scrub_free_csums(struct scrub_dev *sdev) { while (!list_empty(&sdev->csum_list)) { @@ -182,30 +135,37 @@ static void scrub_free_csums(struct scrub_dev *sdev) } } -static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) +static void scrub_free_bio(struct bio *bio) { int i; + struct page *last_page = NULL; - if (!sdev) + if (!bio) return; - /* this can happen when scrub is cancelled */ - if (sdev->curr != -1) { - struct scrub_bio *sbio = sdev->bios[sdev->curr]; - - for (i = 0; i < sbio->page_count; i++) { - BUG_ON(!sbio->pagev[i]); - BUG_ON(!sbio->pagev[i]->page); - scrub_block_put(sbio->pagev[i]->sblock); - } - bio_put(sbio->bio); + for (i = 0; i < bio->bi_vcnt; ++i) { + if (bio->bi_io_vec[i].bv_page == last_page) + continue; + last_page = bio->bi_io_vec[i].bv_page; + __free_page(last_page); } + bio_put(bio); +} + +static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) +{ + int i; + + if (!sdev) + return; for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { struct scrub_bio *sbio = sdev->bios[i]; if (!sbio) break; + + scrub_free_bio(sbio->bio); kfree(sbio); } @@ -219,16 +179,11 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) struct scrub_dev *sdev; int i; struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; - int pages_per_bio; - pages_per_bio = min_t(int, SCRUB_PAGES_PER_BIO, - bio_get_nr_vecs(dev->bdev)); sdev = kzalloc(sizeof(*sdev), GFP_NOFS); if (!sdev) goto nomem; sdev->dev = dev; - sdev->pages_per_bio = pages_per_bio; - sdev->curr = -1; for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { struct scrub_bio *sbio; @@ -239,8 +194,8 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) sbio->index = i; sbio->sdev = sdev; - sbio->page_count = 0; - sbio->work.func = scrub_bio_end_io_worker; + sbio->count = 0; + sbio->work.func = scrub_checksum; if (i != SCRUB_BIOS_PER_DEV-1) sdev->bios[i]->next_free = i + 1; @@ -248,9 +203,7 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) sdev->bios[i]->next_free = -1; } sdev->first_free = 0; - sdev->nodesize = dev->dev_root->nodesize; - sdev->leafsize = dev->dev_root->leafsize; - sdev->sectorsize = dev->dev_root->sectorsize; + sdev->curr = -1; atomic_set(&sdev->in_flight, 0); atomic_set(&sdev->fixup_cnt, 0); atomic_set(&sdev->cancel_req, 0); @@ -341,9 +294,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) return 0; } -static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) +static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio, + int ix) { - struct btrfs_device *dev = sblock->sdev->dev; + struct btrfs_device *dev = sbio->sdev->dev; struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; struct btrfs_path *path; struct btrfs_key found_key; @@ -362,9 +316,8 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS); swarn.msg_buf = kmalloc(bufsize, GFP_NOFS); - BUG_ON(sblock->page_count < 1); - swarn.sector = (sblock->pagev[0].physical) >> 9; - swarn.logical = sblock->pagev[0].logical; + swarn.sector = (sbio->physical + ix * PAGE_SIZE) >> 9; + swarn.logical = sbio->logical + ix * PAGE_SIZE; swarn.errstr = errstr; swarn.dev = dev; swarn.msg_bufsize = bufsize; @@ -389,8 +342,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) do { ret = tree_backref_for_extent(&ptr, eb, ei, item_size, &ref_root, &ref_level); - printk(KERN_WARNING - "btrfs: %s at logical %llu on dev %s, " + printk(KERN_WARNING "%s at logical %llu on dev %s, " "sector %llu: metadata %s (level %d) in tree " "%llu\n", errstr, swarn.logical, dev->name, (unsigned long long)swarn.sector, @@ -400,8 +352,8 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) } while (ret != 1); } else { swarn.path = path; - iterate_extent_inodes(fs_info, found_key.objectid, - extent_item_pos, 1, + iterate_extent_inodes(fs_info, path, found_key.objectid, + extent_item_pos, scrub_print_warning_inode, &swarn); } @@ -579,9 +531,9 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work) spin_lock(&sdev->stat_lock); ++sdev->stat.uncorrectable_errors; spin_unlock(&sdev->stat_lock); - printk_ratelimited(KERN_ERR - "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", - (unsigned long long)fixup->logical, sdev->dev->name); + printk_ratelimited(KERN_ERR "btrfs: unable to fixup " + "(nodatasum) error at logical %llu\n", + fixup->logical); } btrfs_free_path(path); @@ -598,168 +550,91 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work) } /* - * scrub_handle_errored_block gets called when either verification of the - * pages failed or the bio failed to read, e.g. with EIO. In the latter - * case, this function handles all pages in the bio, even though only one - * may be bad. - * The goal of this function is to repair the errored block by using the - * contents of one of the mirrors. + * scrub_recheck_error gets called when either verification of the page + * failed or the bio failed to read, e.g. with EIO. In the latter case, + * recheck_error gets called for every page in the bio, even though only + * one may be bad */ -static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) +static int scrub_recheck_error(struct scrub_bio *sbio, int ix) { - struct scrub_dev *sdev = sblock_to_check->sdev; - struct btrfs_fs_info *fs_info; - u64 length; - u64 logical; - u64 generation; - unsigned int failed_mirror_index; - unsigned int is_metadata; - unsigned int have_csum; - u8 *csum; - struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */ - struct scrub_block *sblock_bad; - int ret; - int mirror_index; - int page_num; - int success; + struct scrub_dev *sdev = sbio->sdev; + u64 sector = (sbio->physical + ix * PAGE_SIZE) >> 9; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, - DEFAULT_RATELIMIT_BURST); - - BUG_ON(sblock_to_check->page_count < 1); - fs_info = sdev->dev->dev_root->fs_info; - length = sblock_to_check->page_count * PAGE_SIZE; - logical = sblock_to_check->pagev[0].logical; - generation = sblock_to_check->pagev[0].generation; - BUG_ON(sblock_to_check->pagev[0].mirror_num < 1); - failed_mirror_index = sblock_to_check->pagev[0].mirror_num - 1; - is_metadata = !(sblock_to_check->pagev[0].flags & - BTRFS_EXTENT_FLAG_DATA); - have_csum = sblock_to_check->pagev[0].have_csum; - csum = sblock_to_check->pagev[0].csum; - - /* - * read all mirrors one after the other. This includes to - * re-read the extent or metadata block that failed (that was - * the cause that this fixup code is called) another time, - * page by page this time in order to know which pages - * caused I/O errors and which ones are good (for all mirrors). - * It is the goal to handle the situation when more than one - * mirror contains I/O errors, but the errors do not - * overlap, i.e. the data can be repaired by selecting the - * pages from those mirrors without I/O error on the - * particular pages. One example (with blocks >= 2 * PAGE_SIZE) - * would be that mirror #1 has an I/O error on the first page, - * the second page is good, and mirror #2 has an I/O error on - * the second page, but the first page is good. - * Then the first page of the first mirror can be repaired by - * taking the first page of the second mirror, and the - * second page of the second mirror can be repaired by - * copying the contents of the 2nd page of the 1st mirror. - * One more note: if the pages of one mirror contain I/O - * errors, the checksum cannot be verified. In order to get - * the best data for repairing, the first attempt is to find - * a mirror without I/O errors and with a validated checksum. - * Only if this is not possible, the pages are picked from - * mirrors with I/O errors without considering the checksum. - * If the latter is the case, at the end, the checksum of the - * repaired area is verified in order to correctly maintain - * the statistics. - */ - - sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS * - sizeof(*sblocks_for_recheck), - GFP_NOFS); - if (!sblocks_for_recheck) { - spin_lock(&sdev->stat_lock); - sdev->stat.malloc_errors++; - sdev->stat.read_errors++; - sdev->stat.uncorrectable_errors++; - spin_unlock(&sdev->stat_lock); - goto out; - } + DEFAULT_RATELIMIT_BURST); - /* setup the context, map the logical blocks and alloc the pages */ - ret = scrub_setup_recheck_block(sdev, &fs_info->mapping_tree, length, - logical, sblocks_for_recheck); - if (ret) { - spin_lock(&sdev->stat_lock); - sdev->stat.read_errors++; - sdev->stat.uncorrectable_errors++; - spin_unlock(&sdev->stat_lock); - goto out; + if (sbio->err) { + if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, sector, + sbio->bio->bi_io_vec[ix].bv_page) == 0) { + if (scrub_fixup_check(sbio, ix) == 0) + return 0; + } + if (__ratelimit(&_rs)) + scrub_print_warning("i/o error", sbio, ix); + } else { + if (__ratelimit(&_rs)) + scrub_print_warning("checksum error", sbio, ix); } - BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); - sblock_bad = sblocks_for_recheck + failed_mirror_index; - /* build and submit the bios for the failed mirror, check checksums */ - ret = scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, - csum, generation, sdev->csum_size); - if (ret) { - spin_lock(&sdev->stat_lock); - sdev->stat.read_errors++; - sdev->stat.uncorrectable_errors++; - spin_unlock(&sdev->stat_lock); - goto out; - } + spin_lock(&sdev->stat_lock); + ++sdev->stat.read_errors; + spin_unlock(&sdev->stat_lock); - if (!sblock_bad->header_error && !sblock_bad->checksum_error && - sblock_bad->no_io_error_seen) { - /* - * the error disappeared after reading page by page, or - * the area was part of a huge bio and other parts of the - * bio caused I/O errors, or the block layer merged several - * read requests into one and the error is caused by a - * different bio (usually one of the two latter cases is - * the cause) - */ - spin_lock(&sdev->stat_lock); - sdev->stat.unverified_errors++; - spin_unlock(&sdev->stat_lock); + scrub_fixup(sbio, ix); + return 1; +} - goto out; - } +static int scrub_fixup_check(struct scrub_bio *sbio, int ix) +{ + int ret = 1; + struct page *page; + void *buffer; + u64 flags = sbio->spag[ix].flags; - if (!sblock_bad->no_io_error_seen) { - spin_lock(&sdev->stat_lock); - sdev->stat.read_errors++; - spin_unlock(&sdev->stat_lock); - if (__ratelimit(&_rs)) - scrub_print_warning("i/o error", sblock_to_check); - } else if (sblock_bad->checksum_error) { - spin_lock(&sdev->stat_lock); - sdev->stat.csum_errors++; - spin_unlock(&sdev->stat_lock); - if (__ratelimit(&_rs)) - scrub_print_warning("checksum error", sblock_to_check); - } else if (sblock_bad->header_error) { - spin_lock(&sdev->stat_lock); - sdev->stat.verify_errors++; - spin_unlock(&sdev->stat_lock); - if (__ratelimit(&_rs)) - scrub_print_warning("checksum/header error", - sblock_to_check); + page = sbio->bio->bi_io_vec[ix].bv_page; + buffer = kmap_atomic(page); + if (flags & BTRFS_EXTENT_FLAG_DATA) { + ret = scrub_checksum_data(sbio->sdev, + sbio->spag + ix, buffer); + } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + ret = scrub_checksum_tree_block(sbio->sdev, + sbio->spag + ix, + sbio->logical + ix * PAGE_SIZE, + buffer); + } else { + WARN_ON(1); } + kunmap_atomic(buffer); - if (sdev->readonly) - goto did_not_correct_error; + return ret; +} - if (!is_metadata && !have_csum) { - struct scrub_fixup_nodatasum *fixup_nodatasum; +static void scrub_fixup_end_io(struct bio *bio, int err) +{ + complete((struct completion *)bio->bi_private); +} - /* - * !is_metadata and !have_csum, this means that the data - * might not be COW'ed, that it might be modified - * concurrently. The general strategy to work on the - * commit root does not help in the case when COW is not - * used. - */ - fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS); - if (!fixup_nodatasum) - goto did_not_correct_error; - fixup_nodatasum->sdev = sdev; - fixup_nodatasum->logical = logical; - fixup_nodatasum->root = fs_info->extent_root; - fixup_nodatasum->mirror_num = failed_mirror_index + 1; +static void scrub_fixup(struct scrub_bio *sbio, int ix) +{ + struct scrub_dev *sdev = sbio->sdev; + struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct btrfs_bio *bbio = NULL; + struct scrub_fixup_nodatasum *fixup; + u64 logical = sbio->logical + ix * PAGE_SIZE; + u64 length; + int i; + int ret; + DECLARE_COMPLETION_ONSTACK(complete); + + if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) && + (sbio->spag[ix].have_csum == 0)) { + fixup = kzalloc(sizeof(*fixup), GFP_NOFS); + if (!fixup) + goto uncorrectable; + fixup->sdev = sdev; + fixup->logical = logical; + fixup->root = fs_info->extent_root; + fixup->mirror_num = sbio->spag[ix].mirror_num; /* * increment scrubs_running to prevent cancel requests from * completing as long as a fixup worker is running. we must also @@ -774,528 +649,235 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) atomic_inc(&fs_info->scrubs_paused); mutex_unlock(&fs_info->scrub_lock); atomic_inc(&sdev->fixup_cnt); - fixup_nodatasum->work.func = scrub_fixup_nodatasum; - btrfs_queue_worker(&fs_info->scrub_workers, - &fixup_nodatasum->work); - goto out; + fixup->work.func = scrub_fixup_nodatasum; + btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work); + return; } - /* - * now build and submit the bios for the other mirrors, check - * checksums - */ - for (mirror_index = 0; - mirror_index < BTRFS_MAX_MIRRORS && - sblocks_for_recheck[mirror_index].page_count > 0; - mirror_index++) { - if (mirror_index == failed_mirror_index) - continue; - - /* build and submit the bios, check checksums */ - ret = scrub_recheck_block(fs_info, - sblocks_for_recheck + mirror_index, - is_metadata, have_csum, csum, - generation, sdev->csum_size); - if (ret) - goto did_not_correct_error; + length = PAGE_SIZE; + ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, + &bbio, 0); + if (ret || !bbio || length < PAGE_SIZE) { + printk(KERN_ERR + "scrub_fixup: btrfs_map_block failed us for %llu\n", + (unsigned long long)logical); + WARN_ON(1); + kfree(bbio); + return; } - /* - * first try to pick the mirror which is completely without I/O - * errors and also does not have a checksum error. - * If one is found, and if a checksum is present, the full block - * that is known to contain an error is rewritten. Afterwards - * the block is known to be corrected. - * If a mirror is found which is completely correct, and no - * checksum is present, only those pages are rewritten that had - * an I/O error in the block to be repaired, since it cannot be - * determined, which copy of the other pages is better (and it - * could happen otherwise that a correct page would be - * overwritten by a bad one). - */ - for (mirror_index = 0; - mirror_index < BTRFS_MAX_MIRRORS && - sblocks_for_recheck[mirror_index].page_count > 0; - mirror_index++) { - struct scrub_block *sblock_other = sblocks_for_recheck + - mirror_index; - - if (!sblock_other->header_error && - !sblock_other->checksum_error && - sblock_other->no_io_error_seen) { - int force_write = is_metadata || have_csum; - - ret = scrub_repair_block_from_good_copy(sblock_bad, - sblock_other, - force_write); - if (0 == ret) - goto corrected_error; - } - } + if (bbio->num_stripes == 1) + /* there aren't any replicas */ + goto uncorrectable; /* - * in case of I/O errors in the area that is supposed to be - * repaired, continue by picking good copies of those pages. - * Select the good pages from mirrors to rewrite bad pages from - * the area to fix. Afterwards verify the checksum of the block - * that is supposed to be repaired. This verification step is - * only done for the purpose of statistic counting and for the - * final scrub report, whether errors remain. - * A perfect algorithm could make use of the checksum and try - * all possible combinations of pages from the different mirrors - * until the checksum verification succeeds. For example, when - * the 2nd page of mirror #1 faces I/O errors, and the 2nd page - * of mirror #2 is readable but the final checksum test fails, - * then the 2nd page of mirror #3 could be tried, whether now - * the final checksum succeedes. But this would be a rare - * exception and is therefore not implemented. At least it is - * avoided that the good copy is overwritten. - * A more useful improvement would be to pick the sectors - * without I/O error based on sector sizes (512 bytes on legacy - * disks) instead of on PAGE_SIZE. Then maybe 512 byte of one - * mirror could be repaired by taking 512 byte of a different - * mirror, even if other 512 byte sectors in the same PAGE_SIZE - * area are unreadable. + * first find a good copy */ - - /* can only fix I/O errors from here on */ - if (sblock_bad->no_io_error_seen) - goto did_not_correct_error; - - success = 1; - for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { - struct scrub_page *page_bad = sblock_bad->pagev + page_num; - - if (!page_bad->io_error) + for (i = 0; i < bbio->num_stripes; ++i) { + if (i + 1 == sbio->spag[ix].mirror_num) continue; - for (mirror_index = 0; - mirror_index < BTRFS_MAX_MIRRORS && - sblocks_for_recheck[mirror_index].page_count > 0; - mirror_index++) { - struct scrub_block *sblock_other = sblocks_for_recheck + - mirror_index; - struct scrub_page *page_other = sblock_other->pagev + - page_num; - - if (!page_other->io_error) { - ret = scrub_repair_page_from_good_copy( - sblock_bad, sblock_other, page_num, 0); - if (0 == ret) { - page_bad->io_error = 0; - break; /* succeeded for this page */ - } - } - } - - if (page_bad->io_error) { - /* did not find a mirror to copy the page from */ - success = 0; - } - } - - if (success) { - if (is_metadata || have_csum) { - /* - * need to verify the checksum now that all - * sectors on disk are repaired (the write - * request for data to be repaired is on its way). - * Just be lazy and use scrub_recheck_block() - * which re-reads the data before the checksum - * is verified, but most likely the data comes out - * of the page cache. - */ - ret = scrub_recheck_block(fs_info, sblock_bad, - is_metadata, have_csum, csum, - generation, sdev->csum_size); - if (!ret && !sblock_bad->header_error && - !sblock_bad->checksum_error && - sblock_bad->no_io_error_seen) - goto corrected_error; - else - goto did_not_correct_error; - } else { -corrected_error: - spin_lock(&sdev->stat_lock); - sdev->stat.corrected_errors++; - spin_unlock(&sdev->stat_lock); - printk_ratelimited(KERN_ERR - "btrfs: fixed up error at logical %llu on dev %s\n", - (unsigned long long)logical, sdev->dev->name); + if (scrub_fixup_io(READ, bbio->stripes[i].dev->bdev, + bbio->stripes[i].physical >> 9, + sbio->bio->bi_io_vec[ix].bv_page)) { + /* I/O-error, this is not a good copy */ + continue; } - } else { -did_not_correct_error: - spin_lock(&sdev->stat_lock); - sdev->stat.uncorrectable_errors++; - spin_unlock(&sdev->stat_lock); - printk_ratelimited(KERN_ERR - "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", - (unsigned long long)logical, sdev->dev->name); - } -out: - if (sblocks_for_recheck) { - for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS; - mirror_index++) { - struct scrub_block *sblock = sblocks_for_recheck + - mirror_index; - int page_index; - - for (page_index = 0; page_index < SCRUB_PAGES_PER_BIO; - page_index++) - if (sblock->pagev[page_index].page) - __free_page( - sblock->pagev[page_index].page); - } - kfree(sblocks_for_recheck); + if (scrub_fixup_check(sbio, ix) == 0) + break; } + if (i == bbio->num_stripes) + goto uncorrectable; - return 0; -} - -static int scrub_setup_recheck_block(struct scrub_dev *sdev, - struct btrfs_mapping_tree *map_tree, - u64 length, u64 logical, - struct scrub_block *sblocks_for_recheck) -{ - int page_index; - int mirror_index; - int ret; - - /* - * note: the three members sdev, ref_count and outstanding_pages - * are not used (and not set) in the blocks that are used for - * the recheck procedure - */ - - page_index = 0; - while (length > 0) { - u64 sublen = min_t(u64, length, PAGE_SIZE); - u64 mapped_length = sublen; - struct btrfs_bio *bbio = NULL; - + if (!sdev->readonly) { /* - * with a length of PAGE_SIZE, each returned stripe - * represents one mirror + * bi_io_vec[ix].bv_page now contains good data, write it back */ - ret = btrfs_map_block(map_tree, WRITE, logical, &mapped_length, - &bbio, 0); - if (ret || !bbio || mapped_length < sublen) { - kfree(bbio); - return -EIO; - } - - BUG_ON(page_index >= SCRUB_PAGES_PER_BIO); - for (mirror_index = 0; mirror_index < (int)bbio->num_stripes; - mirror_index++) { - struct scrub_block *sblock; - struct scrub_page *page; - - if (mirror_index >= BTRFS_MAX_MIRRORS) - continue; - - sblock = sblocks_for_recheck + mirror_index; - page = sblock->pagev + page_index; - page->logical = logical; - page->physical = bbio->stripes[mirror_index].physical; - page->bdev = bbio->stripes[mirror_index].dev->bdev; - page->mirror_num = mirror_index + 1; - page->page = alloc_page(GFP_NOFS); - if (!page->page) { - spin_lock(&sdev->stat_lock); - sdev->stat.malloc_errors++; - spin_unlock(&sdev->stat_lock); - return -ENOMEM; - } - sblock->page_count++; + if (scrub_fixup_io(WRITE, sdev->dev->bdev, + (sbio->physical + ix * PAGE_SIZE) >> 9, + sbio->bio->bi_io_vec[ix].bv_page)) { + /* I/O-error, writeback failed, give up */ + goto uncorrectable; } - kfree(bbio); - length -= sublen; - logical += sublen; - page_index++; } - return 0; -} - -/* - * this function will check the on disk data for checksum errors, header - * errors and read I/O errors. If any I/O errors happen, the exact pages - * which are errored are marked as being bad. The goal is to enable scrub - * to take those pages that are not errored from all the mirrors so that - * the pages that are errored in the just handled mirror can be repaired. - */ -static int scrub_recheck_block(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, int is_metadata, - int have_csum, u8 *csum, u64 generation, - u16 csum_size) -{ - int page_num; - - sblock->no_io_error_seen = 1; - sblock->header_error = 0; - sblock->checksum_error = 0; - - for (page_num = 0; page_num < sblock->page_count; page_num++) { - struct bio *bio; - int ret; - struct scrub_page *page = sblock->pagev + page_num; - DECLARE_COMPLETION_ONSTACK(complete); - - BUG_ON(!page->page); - bio = bio_alloc(GFP_NOFS, 1); - bio->bi_bdev = page->bdev; - bio->bi_sector = page->physical >> 9; - bio->bi_end_io = scrub_complete_bio_end_io; - bio->bi_private = &complete; - - ret = bio_add_page(bio, page->page, PAGE_SIZE, 0); - if (PAGE_SIZE != ret) { - bio_put(bio); - return -EIO; - } - btrfsic_submit_bio(READ, bio); - - /* this will also unplug the queue */ - wait_for_completion(&complete); + kfree(bbio); + spin_lock(&sdev->stat_lock); + ++sdev->stat.corrected_errors; + spin_unlock(&sdev->stat_lock); - page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags); - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - sblock->no_io_error_seen = 0; - bio_put(bio); - } + printk_ratelimited(KERN_ERR "btrfs: fixed up error at logical %llu\n", + (unsigned long long)logical); + return; - if (sblock->no_io_error_seen) - scrub_recheck_block_checksum(fs_info, sblock, is_metadata, - have_csum, csum, generation, - csum_size); +uncorrectable: + kfree(bbio); + spin_lock(&sdev->stat_lock); + ++sdev->stat.uncorrectable_errors; + spin_unlock(&sdev->stat_lock); - return 0; + printk_ratelimited(KERN_ERR "btrfs: unable to fixup (regular) error at " + "logical %llu\n", (unsigned long long)logical); } -static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, - int is_metadata, int have_csum, - const u8 *csum, u64 generation, - u16 csum_size) +static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, + struct page *page) { - int page_num; - u8 calculated_csum[BTRFS_CSUM_SIZE]; - u32 crc = ~(u32)0; - struct btrfs_root *root = fs_info->extent_root; - void *mapped_buffer; - - BUG_ON(!sblock->pagev[0].page); - if (is_metadata) { - struct btrfs_header *h; - - mapped_buffer = kmap_atomic(sblock->pagev[0].page); - h = (struct btrfs_header *)mapped_buffer; - - if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || - generation != le64_to_cpu(h->generation) || - memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || - memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, - BTRFS_UUID_SIZE)) - sblock->header_error = 1; - csum = h->csum; - } else { - if (!have_csum) - return; - - mapped_buffer = kmap_atomic(sblock->pagev[0].page); - } - - for (page_num = 0;;) { - if (page_num == 0 && is_metadata) - crc = btrfs_csum_data(root, - ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE, - crc, PAGE_SIZE - BTRFS_CSUM_SIZE); - else - crc = btrfs_csum_data(root, mapped_buffer, crc, - PAGE_SIZE); + struct bio *bio = NULL; + int ret; + DECLARE_COMPLETION_ONSTACK(complete); - kunmap_atomic(mapped_buffer); - page_num++; - if (page_num >= sblock->page_count) - break; - BUG_ON(!sblock->pagev[page_num].page); + bio = bio_alloc(GFP_NOFS, 1); + bio->bi_bdev = bdev; + bio->bi_sector = sector; + bio_add_page(bio, page, PAGE_SIZE, 0); + bio->bi_end_io = scrub_fixup_end_io; + bio->bi_private = &complete; + btrfsic_submit_bio(rw, bio); - mapped_buffer = kmap_atomic(sblock->pagev[page_num].page); - } + /* this will also unplug the queue */ + wait_for_completion(&complete); - btrfs_csum_final(crc, calculated_csum); - if (memcmp(calculated_csum, csum, csum_size)) - sblock->checksum_error = 1; -} - -static void scrub_complete_bio_end_io(struct bio *bio, int err) -{ - complete((struct completion *)bio->bi_private); + ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); + bio_put(bio); + return ret; } -static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, - struct scrub_block *sblock_good, - int force_write) +static void scrub_bio_end_io(struct bio *bio, int err) { - int page_num; - int ret = 0; - - for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { - int ret_sub; + struct scrub_bio *sbio = bio->bi_private; + struct scrub_dev *sdev = sbio->sdev; + struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; - ret_sub = scrub_repair_page_from_good_copy(sblock_bad, - sblock_good, - page_num, - force_write); - if (ret_sub) - ret = ret_sub; - } + sbio->err = err; + sbio->bio = bio; - return ret; + btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); } -static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, - struct scrub_block *sblock_good, - int page_num, int force_write) +static void scrub_checksum(struct btrfs_work *work) { - struct scrub_page *page_bad = sblock_bad->pagev + page_num; - struct scrub_page *page_good = sblock_good->pagev + page_num; + struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); + struct scrub_dev *sdev = sbio->sdev; + struct page *page; + void *buffer; + int i; + u64 flags; + u64 logical; + int ret; - BUG_ON(sblock_bad->pagev[page_num].page == NULL); - BUG_ON(sblock_good->pagev[page_num].page == NULL); - if (force_write || sblock_bad->header_error || - sblock_bad->checksum_error || page_bad->io_error) { - struct bio *bio; - int ret; - DECLARE_COMPLETION_ONSTACK(complete); - - bio = bio_alloc(GFP_NOFS, 1); - bio->bi_bdev = page_bad->bdev; - bio->bi_sector = page_bad->physical >> 9; - bio->bi_end_io = scrub_complete_bio_end_io; - bio->bi_private = &complete; - - ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); - if (PAGE_SIZE != ret) { - bio_put(bio); - return -EIO; + if (sbio->err) { + ret = 0; + for (i = 0; i < sbio->count; ++i) + ret |= scrub_recheck_error(sbio, i); + if (!ret) { + spin_lock(&sdev->stat_lock); + ++sdev->stat.unverified_errors; + spin_unlock(&sdev->stat_lock); } - btrfsic_submit_bio(WRITE, bio); - /* this will also unplug the queue */ - wait_for_completion(&complete); - bio_put(bio); - } - - return 0; -} + sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); + sbio->bio->bi_flags |= 1 << BIO_UPTODATE; + sbio->bio->bi_phys_segments = 0; + sbio->bio->bi_idx = 0; -static void scrub_checksum(struct scrub_block *sblock) -{ - u64 flags; - int ret; + for (i = 0; i < sbio->count; i++) { + struct bio_vec *bi; + bi = &sbio->bio->bi_io_vec[i]; + bi->bv_offset = 0; + bi->bv_len = PAGE_SIZE; + } + goto out; + } + for (i = 0; i < sbio->count; ++i) { + page = sbio->bio->bi_io_vec[i].bv_page; + buffer = kmap_atomic(page); + flags = sbio->spag[i].flags; + logical = sbio->logical + i * PAGE_SIZE; + ret = 0; + if (flags & BTRFS_EXTENT_FLAG_DATA) { + ret = scrub_checksum_data(sdev, sbio->spag + i, buffer); + } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + ret = scrub_checksum_tree_block(sdev, sbio->spag + i, + logical, buffer); + } else if (flags & BTRFS_EXTENT_FLAG_SUPER) { + BUG_ON(i); + (void)scrub_checksum_super(sbio, buffer); + } else { + WARN_ON(1); + } + kunmap_atomic(buffer); + if (ret) { + ret = scrub_recheck_error(sbio, i); + if (!ret) { + spin_lock(&sdev->stat_lock); + ++sdev->stat.unverified_errors; + spin_unlock(&sdev->stat_lock); + } + } + } - BUG_ON(sblock->page_count < 1); - flags = sblock->pagev[0].flags; - ret = 0; - if (flags & BTRFS_EXTENT_FLAG_DATA) - ret = scrub_checksum_data(sblock); - else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) - ret = scrub_checksum_tree_block(sblock); - else if (flags & BTRFS_EXTENT_FLAG_SUPER) - (void)scrub_checksum_super(sblock); - else - WARN_ON(1); - if (ret) - scrub_handle_errored_block(sblock); +out: + scrub_free_bio(sbio->bio); + sbio->bio = NULL; + spin_lock(&sdev->list_lock); + sbio->next_free = sdev->first_free; + sdev->first_free = sbio->index; + spin_unlock(&sdev->list_lock); + atomic_dec(&sdev->in_flight); + wake_up(&sdev->list_wait); } -static int scrub_checksum_data(struct scrub_block *sblock) +static int scrub_checksum_data(struct scrub_dev *sdev, + struct scrub_page *spag, void *buffer) { - struct scrub_dev *sdev = sblock->sdev; u8 csum[BTRFS_CSUM_SIZE]; - u8 *on_disk_csum; - struct page *page; - void *buffer; u32 crc = ~(u32)0; int fail = 0; struct btrfs_root *root = sdev->dev->dev_root; - u64 len; - int index; - BUG_ON(sblock->page_count < 1); - if (!sblock->pagev[0].have_csum) + if (!spag->have_csum) return 0; - on_disk_csum = sblock->pagev[0].csum; - page = sblock->pagev[0].page; - buffer = kmap_atomic(page); - - len = sdev->sectorsize; - index = 0; - for (;;) { - u64 l = min_t(u64, len, PAGE_SIZE); - - crc = btrfs_csum_data(root, buffer, crc, l); - kunmap_atomic(buffer); - len -= l; - if (len == 0) - break; - index++; - BUG_ON(index >= sblock->page_count); - BUG_ON(!sblock->pagev[index].page); - page = sblock->pagev[index].page; - buffer = kmap_atomic(page); - } - + crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE); btrfs_csum_final(crc, csum); - if (memcmp(csum, on_disk_csum, sdev->csum_size)) + if (memcmp(csum, spag->csum, sdev->csum_size)) fail = 1; - if (fail) { - spin_lock(&sdev->stat_lock); + spin_lock(&sdev->stat_lock); + ++sdev->stat.data_extents_scrubbed; + sdev->stat.data_bytes_scrubbed += PAGE_SIZE; + if (fail) ++sdev->stat.csum_errors; - spin_unlock(&sdev->stat_lock); - } + spin_unlock(&sdev->stat_lock); return fail; } -static int scrub_checksum_tree_block(struct scrub_block *sblock) +static int scrub_checksum_tree_block(struct scrub_dev *sdev, + struct scrub_page *spag, u64 logical, + void *buffer) { - struct scrub_dev *sdev = sblock->sdev; struct btrfs_header *h; struct btrfs_root *root = sdev->dev->dev_root; struct btrfs_fs_info *fs_info = root->fs_info; - u8 calculated_csum[BTRFS_CSUM_SIZE]; - u8 on_disk_csum[BTRFS_CSUM_SIZE]; - struct page *page; - void *mapped_buffer; - u64 mapped_size; - void *p; + u8 csum[BTRFS_CSUM_SIZE]; u32 crc = ~(u32)0; int fail = 0; int crc_fail = 0; - u64 len; - int index; - - BUG_ON(sblock->page_count < 1); - page = sblock->pagev[0].page; - mapped_buffer = kmap_atomic(page); - h = (struct btrfs_header *)mapped_buffer; - memcpy(on_disk_csum, h->csum, sdev->csum_size); /* * we don't use the getter functions here, as we * a) don't have an extent buffer and * b) the page is already kmapped */ + h = (struct btrfs_header *)buffer; - if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr)) + if (logical != le64_to_cpu(h->bytenr)) ++fail; - if (sblock->pagev[0].generation != le64_to_cpu(h->generation)) + if (spag->generation != le64_to_cpu(h->generation)) ++fail; if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) @@ -1305,99 +887,51 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) BTRFS_UUID_SIZE)) ++fail; - BUG_ON(sdev->nodesize != sdev->leafsize); - len = sdev->nodesize - BTRFS_CSUM_SIZE; - mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; - p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; - index = 0; - for (;;) { - u64 l = min_t(u64, len, mapped_size); - - crc = btrfs_csum_data(root, p, crc, l); - kunmap_atomic(mapped_buffer); - len -= l; - if (len == 0) - break; - index++; - BUG_ON(index >= sblock->page_count); - BUG_ON(!sblock->pagev[index].page); - page = sblock->pagev[index].page; - mapped_buffer = kmap_atomic(page); - mapped_size = PAGE_SIZE; - p = mapped_buffer; - } - - btrfs_csum_final(crc, calculated_csum); - if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) + crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc, + PAGE_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, csum); + if (memcmp(csum, h->csum, sdev->csum_size)) ++crc_fail; - if (crc_fail || fail) { - spin_lock(&sdev->stat_lock); - if (crc_fail) - ++sdev->stat.csum_errors; - if (fail) - ++sdev->stat.verify_errors; - spin_unlock(&sdev->stat_lock); - } + spin_lock(&sdev->stat_lock); + ++sdev->stat.tree_extents_scrubbed; + sdev->stat.tree_bytes_scrubbed += PAGE_SIZE; + if (crc_fail) + ++sdev->stat.csum_errors; + if (fail) + ++sdev->stat.verify_errors; + spin_unlock(&sdev->stat_lock); return fail || crc_fail; } -static int scrub_checksum_super(struct scrub_block *sblock) +static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer) { struct btrfs_super_block *s; - struct scrub_dev *sdev = sblock->sdev; + u64 logical; + struct scrub_dev *sdev = sbio->sdev; struct btrfs_root *root = sdev->dev->dev_root; struct btrfs_fs_info *fs_info = root->fs_info; - u8 calculated_csum[BTRFS_CSUM_SIZE]; - u8 on_disk_csum[BTRFS_CSUM_SIZE]; - struct page *page; - void *mapped_buffer; - u64 mapped_size; - void *p; + u8 csum[BTRFS_CSUM_SIZE]; u32 crc = ~(u32)0; int fail = 0; - u64 len; - int index; - BUG_ON(sblock->page_count < 1); - page = sblock->pagev[0].page; - mapped_buffer = kmap_atomic(page); - s = (struct btrfs_super_block *)mapped_buffer; - memcpy(on_disk_csum, s->csum, sdev->csum_size); + s = (struct btrfs_super_block *)buffer; + logical = sbio->logical; - if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) + if (logical != le64_to_cpu(s->bytenr)) ++fail; - if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) + if (sbio->spag[0].generation != le64_to_cpu(s->generation)) ++fail; if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) ++fail; - len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; - mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; - p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; - index = 0; - for (;;) { - u64 l = min_t(u64, len, mapped_size); - - crc = btrfs_csum_data(root, p, crc, l); - kunmap_atomic(mapped_buffer); - len -= l; - if (len == 0) - break; - index++; - BUG_ON(index >= sblock->page_count); - BUG_ON(!sblock->pagev[index].page); - page = sblock->pagev[index].page; - mapped_buffer = kmap_atomic(page); - mapped_size = PAGE_SIZE; - p = mapped_buffer; - } - - btrfs_csum_final(crc, calculated_csum); - if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) + crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc, + PAGE_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, csum); + if (memcmp(csum, s->csum, sbio->sdev->csum_size)) ++fail; if (fail) { @@ -1414,42 +948,29 @@ static int scrub_checksum_super(struct scrub_block *sblock) return fail; } -static void scrub_block_get(struct scrub_block *sblock) -{ - atomic_inc(&sblock->ref_count); -} - -static void scrub_block_put(struct scrub_block *sblock) -{ - if (atomic_dec_and_test(&sblock->ref_count)) { - int i; - - for (i = 0; i < sblock->page_count; i++) - if (sblock->pagev[i].page) - __free_page(sblock->pagev[i].page); - kfree(sblock); - } -} - -static void scrub_submit(struct scrub_dev *sdev) +static int scrub_submit(struct scrub_dev *sdev) { struct scrub_bio *sbio; if (sdev->curr == -1) - return; + return 0; sbio = sdev->bios[sdev->curr]; + sbio->err = 0; sdev->curr = -1; atomic_inc(&sdev->in_flight); btrfsic_submit_bio(READ, sbio->bio); + + return 0; } -static int scrub_add_page_to_bio(struct scrub_dev *sdev, - struct scrub_page *spage) +static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, + u64 physical, u64 flags, u64 gen, int mirror_num, + u8 *csum, int force) { - struct scrub_block *sblock = spage->sblock; struct scrub_bio *sbio; + struct page *page; int ret; again: @@ -1462,7 +983,7 @@ static int scrub_add_page_to_bio(struct scrub_dev *sdev, if (sdev->curr != -1) { sdev->first_free = sdev->bios[sdev->curr]->next_free; sdev->bios[sdev->curr]->next_free = -1; - sdev->bios[sdev->curr]->page_count = 0; + sdev->bios[sdev->curr]->count = 0; spin_unlock(&sdev->list_lock); } else { spin_unlock(&sdev->list_lock); @@ -1470,200 +991,62 @@ static int scrub_add_page_to_bio(struct scrub_dev *sdev, } } sbio = sdev->bios[sdev->curr]; - if (sbio->page_count == 0) { + if (sbio->count == 0) { struct bio *bio; - sbio->physical = spage->physical; - sbio->logical = spage->logical; - bio = sbio->bio; - if (!bio) { - bio = bio_alloc(GFP_NOFS, sdev->pages_per_bio); - if (!bio) - return -ENOMEM; - sbio->bio = bio; - } + sbio->physical = physical; + sbio->logical = logical; + bio = bio_alloc(GFP_NOFS, SCRUB_PAGES_PER_BIO); + if (!bio) + return -ENOMEM; bio->bi_private = sbio; bio->bi_end_io = scrub_bio_end_io; bio->bi_bdev = sdev->dev->bdev; - bio->bi_sector = spage->physical >> 9; + bio->bi_sector = sbio->physical >> 9; sbio->err = 0; - } else if (sbio->physical + sbio->page_count * PAGE_SIZE != - spage->physical || - sbio->logical + sbio->page_count * PAGE_SIZE != - spage->logical) { - scrub_submit(sdev); - goto again; - } - - sbio->pagev[sbio->page_count] = spage; - ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0); - if (ret != PAGE_SIZE) { - if (sbio->page_count < 1) { - bio_put(sbio->bio); - sbio->bio = NULL; - return -EIO; - } - scrub_submit(sdev); + sbio->bio = bio; + } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || + sbio->logical + sbio->count * PAGE_SIZE != logical) { + ret = scrub_submit(sdev); + if (ret) + return ret; goto again; } + sbio->spag[sbio->count].flags = flags; + sbio->spag[sbio->count].generation = gen; + sbio->spag[sbio->count].have_csum = 0; + sbio->spag[sbio->count].mirror_num = mirror_num; - scrub_block_get(sblock); /* one for the added page */ - atomic_inc(&sblock->outstanding_pages); - sbio->page_count++; - if (sbio->page_count == sdev->pages_per_bio) - scrub_submit(sdev); - - return 0; -} - -static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, - u64 physical, u64 flags, u64 gen, int mirror_num, - u8 *csum, int force) -{ - struct scrub_block *sblock; - int index; - - sblock = kzalloc(sizeof(*sblock), GFP_NOFS); - if (!sblock) { - spin_lock(&sdev->stat_lock); - sdev->stat.malloc_errors++; - spin_unlock(&sdev->stat_lock); + page = alloc_page(GFP_NOFS); + if (!page) return -ENOMEM; - } - - /* one ref inside this function, plus one for each page later on */ - atomic_set(&sblock->ref_count, 1); - sblock->sdev = sdev; - sblock->no_io_error_seen = 1; - for (index = 0; len > 0; index++) { - struct scrub_page *spage = sblock->pagev + index; - u64 l = min_t(u64, len, PAGE_SIZE); - - BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); - spage->page = alloc_page(GFP_NOFS); - if (!spage->page) { - spin_lock(&sdev->stat_lock); - sdev->stat.malloc_errors++; - spin_unlock(&sdev->stat_lock); - while (index > 0) { - index--; - __free_page(sblock->pagev[index].page); - } - kfree(sblock); - return -ENOMEM; - } - spage->sblock = sblock; - spage->bdev = sdev->dev->bdev; - spage->flags = flags; - spage->generation = gen; - spage->logical = logical; - spage->physical = physical; - spage->mirror_num = mirror_num; - if (csum) { - spage->have_csum = 1; - memcpy(spage->csum, csum, sdev->csum_size); - } else { - spage->have_csum = 0; - } - sblock->page_count++; - len -= l; - logical += l; - physical += l; + ret = bio_add_page(sbio->bio, page, PAGE_SIZE, 0); + if (!ret) { + __free_page(page); + ret = scrub_submit(sdev); + if (ret) + return ret; + goto again; } - BUG_ON(sblock->page_count == 0); - for (index = 0; index < sblock->page_count; index++) { - struct scrub_page *spage = sblock->pagev + index; + if (csum) { + sbio->spag[sbio->count].have_csum = 1; + memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); + } + ++sbio->count; + if (sbio->count == SCRUB_PAGES_PER_BIO || force) { int ret; - ret = scrub_add_page_to_bio(sdev, spage); - if (ret) { - scrub_block_put(sblock); + ret = scrub_submit(sdev); + if (ret) return ret; - } } - if (force) - scrub_submit(sdev); - - /* last one frees, either here or in bio completion for last page */ - scrub_block_put(sblock); return 0; } -static void scrub_bio_end_io(struct bio *bio, int err) -{ - struct scrub_bio *sbio = bio->bi_private; - struct scrub_dev *sdev = sbio->sdev; - struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; - - sbio->err = err; - sbio->bio = bio; - - btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); -} - -static void scrub_bio_end_io_worker(struct btrfs_work *work) -{ - struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); - struct scrub_dev *sdev = sbio->sdev; - int i; - - BUG_ON(sbio->page_count > SCRUB_PAGES_PER_BIO); - if (sbio->err) { - for (i = 0; i < sbio->page_count; i++) { - struct scrub_page *spage = sbio->pagev[i]; - - spage->io_error = 1; - spage->sblock->no_io_error_seen = 0; - } - } - - /* now complete the scrub_block items that have all pages completed */ - for (i = 0; i < sbio->page_count; i++) { - struct scrub_page *spage = sbio->pagev[i]; - struct scrub_block *sblock = spage->sblock; - - if (atomic_dec_and_test(&sblock->outstanding_pages)) - scrub_block_complete(sblock); - scrub_block_put(sblock); - } - - if (sbio->err) { - /* what is this good for??? */ - sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); - sbio->bio->bi_flags |= 1 << BIO_UPTODATE; - sbio->bio->bi_phys_segments = 0; - sbio->bio->bi_idx = 0; - - for (i = 0; i < sbio->page_count; i++) { - struct bio_vec *bi; - bi = &sbio->bio->bi_io_vec[i]; - bi->bv_offset = 0; - bi->bv_len = PAGE_SIZE; - } - } - - bio_put(sbio->bio); - sbio->bio = NULL; - spin_lock(&sdev->list_lock); - sbio->next_free = sdev->first_free; - sdev->first_free = sbio->index; - spin_unlock(&sdev->list_lock); - atomic_dec(&sdev->in_flight); - wake_up(&sdev->list_wait); -} - -static void scrub_block_complete(struct scrub_block *sblock) -{ - if (!sblock->no_io_error_seen) - scrub_handle_errored_block(sblock); - else - scrub_checksum(sblock); -} - static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, u8 *csum) { @@ -1671,6 +1054,7 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, int ret = 0; unsigned long i; unsigned long num_sectors; + u32 sectorsize = sdev->dev->dev_root->sectorsize; while (!list_empty(&sdev->csum_list)) { sum = list_first_entry(&sdev->csum_list, @@ -1688,7 +1072,7 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, if (!sum) return 0; - num_sectors = sum->len / sdev->sectorsize; + num_sectors = sum->len / sectorsize; for (i = 0; i < num_sectors; ++i) { if (sum->sums[i].bytenr == logical) { memcpy(csum, &sum->sums[i].sum, sdev->csum_size); @@ -1709,28 +1093,9 @@ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, { int ret; u8 csum[BTRFS_CSUM_SIZE]; - u32 blocksize; - - if (flags & BTRFS_EXTENT_FLAG_DATA) { - blocksize = sdev->sectorsize; - spin_lock(&sdev->stat_lock); - sdev->stat.data_extents_scrubbed++; - sdev->stat.data_bytes_scrubbed += len; - spin_unlock(&sdev->stat_lock); - } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { - BUG_ON(sdev->nodesize != sdev->leafsize); - blocksize = sdev->nodesize; - spin_lock(&sdev->stat_lock); - sdev->stat.tree_extents_scrubbed++; - sdev->stat.tree_bytes_scrubbed += len; - spin_unlock(&sdev->stat_lock); - } else { - blocksize = sdev->sectorsize; - BUG_ON(1); - } while (len) { - u64 l = min_t(u64, len, blocksize); + u64 l = min_t(u64, len, PAGE_SIZE); int have_csum = 0; if (flags & BTRFS_EXTENT_FLAG_DATA) { @@ -1739,8 +1104,8 @@ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, if (have_csum == 0) ++sdev->stat.no_csum; } - ret = scrub_pages(sdev, logical, l, physical, flags, gen, - mirror_num, have_csum ? csum : NULL, 0); + ret = scrub_page(sdev, logical, l, physical, flags, gen, + mirror_num, have_csum ? csum : NULL, 0); if (ret) return ret; len -= l; @@ -1805,11 +1170,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, if (!path) return -ENOMEM; - /* - * work on commit root. The related disk blocks are static as - * long as COW is applied. This means, it is save to rewrite - * them to repair disk errors without any race conditions - */ path->search_commit_root = 1; path->skip_locking = 1; @@ -2156,18 +1516,15 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) struct btrfs_device *device = sdev->dev; struct btrfs_root *root = device->dev_root; - if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) - return -EIO; - gen = root->fs_info->last_trans_committed; for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); - if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) + if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) break; - ret = scrub_pages(sdev, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, - BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1); + ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr, + BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1); if (ret) return ret; } @@ -2226,30 +1583,10 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, /* * check some assumptions */ - if (root->nodesize != root->leafsize) { - printk(KERN_ERR - "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n", - root->nodesize, root->leafsize); - return -EINVAL; - } - - if (root->nodesize > BTRFS_STRIPE_LEN) { - /* - * in this case scrub is unable to calculate the checksum - * the way scrub is implemented. Do not handle this - * situation at all because it won't ever happen. - */ - printk(KERN_ERR - "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n", - root->nodesize, BTRFS_STRIPE_LEN); - return -EINVAL; - } - - if (root->sectorsize != PAGE_SIZE) { - /* not supported for data w/o checksums */ - printk(KERN_ERR - "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n", - root->sectorsize, (unsigned long long)PAGE_SIZE); + if (root->sectorsize != PAGE_SIZE || + root->sectorsize != root->leafsize || + root->sectorsize != root->nodesize) { + printk(KERN_ERR "btrfs_scrub: size assumptions fail\n"); return -EINVAL; } @@ -2319,7 +1656,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, return ret; } -void btrfs_scrub_pause(struct btrfs_root *root) +int btrfs_scrub_pause(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -2334,28 +1671,34 @@ void btrfs_scrub_pause(struct btrfs_root *root) mutex_lock(&fs_info->scrub_lock); } mutex_unlock(&fs_info->scrub_lock); + + return 0; } -void btrfs_scrub_continue(struct btrfs_root *root) +int btrfs_scrub_continue(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; atomic_dec(&fs_info->scrub_pause_req); wake_up(&fs_info->scrub_pause_wait); + return 0; } -void btrfs_scrub_pause_super(struct btrfs_root *root) +int btrfs_scrub_pause_super(struct btrfs_root *root) { down_write(&root->fs_info->scrub_super_lock); + return 0; } -void btrfs_scrub_continue_super(struct btrfs_root *root) +int btrfs_scrub_continue_super(struct btrfs_root *root) { up_write(&root->fs_info->scrub_super_lock); + return 0; } -int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) +int btrfs_scrub_cancel(struct btrfs_root *root) { + struct btrfs_fs_info *fs_info = root->fs_info; mutex_lock(&fs_info->scrub_lock); if (!atomic_read(&fs_info->scrubs_running)) { @@ -2376,11 +1719,6 @@ int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) return 0; } -int btrfs_scrub_cancel(struct btrfs_root *root) -{ - return __btrfs_scrub_cancel(root->fs_info); -} - int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -2403,7 +1741,6 @@ int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev) return 0; } - int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid) { struct btrfs_fs_info *fs_info = root->fs_info; diff --git a/trunk/fs/btrfs/struct-funcs.c b/trunk/fs/btrfs/struct-funcs.c index c6ffa5812419..bc1f6ad18442 100644 --- a/trunk/fs/btrfs/struct-funcs.c +++ b/trunk/fs/btrfs/struct-funcs.c @@ -44,9 +44,8 @@ #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \ -void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token); \ -u##bits btrfs_token_##name(struct extent_buffer *eb, \ - type *s, struct btrfs_map_token *token) \ +u##bits btrfs_##name(struct extent_buffer *eb, \ + type *s) \ { \ unsigned long part_offset = (unsigned long)s; \ unsigned long offset = part_offset + offsetof(type, member); \ @@ -55,18 +54,9 @@ u##bits btrfs_token_##name(struct extent_buffer *eb, \ char *kaddr; \ unsigned long map_start; \ unsigned long map_len; \ - unsigned long mem_len = sizeof(((type *)0)->member); \ u##bits res; \ - if (token && token->kaddr && token->offset <= offset && \ - token->eb == eb && \ - (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \ - kaddr = token->kaddr; \ - p = (type *)(kaddr + part_offset - token->offset); \ - res = le##bits##_to_cpu(p->member); \ - return res; \ - } \ err = map_private_extent_buffer(eb, offset, \ - mem_len, \ + sizeof(((type *)0)->member), \ &kaddr, &map_start, &map_len); \ if (err) { \ __le##bits leres; \ @@ -75,15 +65,10 @@ u##bits btrfs_token_##name(struct extent_buffer *eb, \ } \ p = (type *)(kaddr + part_offset - map_start); \ res = le##bits##_to_cpu(p->member); \ - if (token) { \ - token->kaddr = kaddr; \ - token->offset = map_start; \ - token->eb = eb; \ - } \ return res; \ } \ -void btrfs_set_token_##name(struct extent_buffer *eb, \ - type *s, u##bits val, struct btrfs_map_token *token) \ +void btrfs_set_##name(struct extent_buffer *eb, \ + type *s, u##bits val) \ { \ unsigned long part_offset = (unsigned long)s; \ unsigned long offset = part_offset + offsetof(type, member); \ @@ -92,17 +77,8 @@ void btrfs_set_token_##name(struct extent_buffer *eb, \ char *kaddr; \ unsigned long map_start; \ unsigned long map_len; \ - unsigned long mem_len = sizeof(((type *)0)->member); \ - if (token && token->kaddr && token->offset <= offset && \ - token->eb == eb && \ - (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \ - kaddr = token->kaddr; \ - p = (type *)(kaddr + part_offset - token->offset); \ - p->member = cpu_to_le##bits(val); \ - return; \ - } \ err = map_private_extent_buffer(eb, offset, \ - mem_len, \ + sizeof(((type *)0)->member), \ &kaddr, &map_start, &map_len); \ if (err) { \ __le##bits val2; \ @@ -112,22 +88,7 @@ void btrfs_set_token_##name(struct extent_buffer *eb, \ } \ p = (type *)(kaddr + part_offset - map_start); \ p->member = cpu_to_le##bits(val); \ - if (token) { \ - token->kaddr = kaddr; \ - token->offset = map_start; \ - token->eb = eb; \ - } \ -} \ -void btrfs_set_##name(struct extent_buffer *eb, \ - type *s, u##bits val) \ -{ \ - btrfs_set_token_##name(eb, s, val, NULL); \ -} \ -u##bits btrfs_##name(struct extent_buffer *eb, \ - type *s) \ -{ \ - return btrfs_token_##name(eb, s, NULL); \ -} \ +} #include "ctree.h" diff --git a/trunk/fs/btrfs/super.c b/trunk/fs/btrfs/super.c index 8d5d380f7bdb..81df3fec6a6d 100644 --- a/trunk/fs/btrfs/super.c +++ b/trunk/fs/btrfs/super.c @@ -76,9 +76,6 @@ static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, case -EROFS: errstr = "Readonly filesystem"; break; - case -EEXIST: - errstr = "Object already exists"; - break; default: if (nbuf) { if (snprintf(nbuf, 16, "error %d", -errno) >= 0) @@ -119,8 +116,6 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { sb->s_flags |= MS_RDONLY; printk(KERN_INFO "btrfs is forced readonly\n"); - __btrfs_scrub_cancel(fs_info); -// WARN_ON(1); } } @@ -129,132 +124,25 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) * invokes the approciate error response. */ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, - unsigned int line, int errno, const char *fmt, ...) + unsigned int line, int errno) { struct super_block *sb = fs_info->sb; char nbuf[16]; const char *errstr; - va_list args; - va_start(args, fmt); /* * Special case: if the error is EROFS, and we're already * under MS_RDONLY, then it is safe here. */ if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) - return; - - errstr = btrfs_decode_error(fs_info, errno, nbuf); - if (fmt) { - struct va_format vaf = { - .fmt = fmt, - .va = &args, - }; - - printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n", - sb->s_id, function, line, errstr, &vaf); - } else { - printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", - sb->s_id, function, line, errstr); - } - - /* Don't go through full error handling during mount */ - if (sb->s_flags & MS_BORN) { - save_error_info(fs_info); - btrfs_handle_error(fs_info); - } - va_end(args); -} - -const char *logtypes[] = { - "emergency", - "alert", - "critical", - "error", - "warning", - "notice", - "info", - "debug", -}; - -void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) -{ - struct super_block *sb = fs_info->sb; - char lvl[4]; - struct va_format vaf; - va_list args; - const char *type = logtypes[4]; - - va_start(args, fmt); - - if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') { - strncpy(lvl, fmt, 3); - fmt += 3; - type = logtypes[fmt[1] - '0']; - } else - *lvl = '\0'; - - vaf.fmt = fmt; - vaf.va = &args; - printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf); -} - -/* - * We only mark the transaction aborted and then set the file system read-only. - * This will prevent new transactions from starting or trying to join this - * one. - * - * This means that error recovery at the call site is limited to freeing - * any local memory allocations and passing the error code up without - * further cleanup. The transaction should complete as it normally would - * in the call path but will return -EIO. - * - * We'll complete the cleanup in btrfs_end_transaction and - * btrfs_commit_transaction. - */ -void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root, const char *function, - unsigned int line, int errno) -{ - WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted"); - trans->aborted = errno; - /* Nothing used. The other threads that have joined this - * transaction may be able to continue. */ - if (!trans->blocks_used) { - btrfs_printk(root->fs_info, "Aborting unused transaction.\n"); return; - } - trans->transaction->aborted = errno; - __btrfs_std_error(root->fs_info, function, line, errno, NULL); -} -/* - * __btrfs_panic decodes unexpected, fatal errors from the caller, - * issues an alert, and either panics or BUGs, depending on mount options. - */ -void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, - unsigned int line, int errno, const char *fmt, ...) -{ - char nbuf[16]; - char *s_id = ""; - const char *errstr; - struct va_format vaf = { .fmt = fmt }; - va_list args; - - if (fs_info) - s_id = fs_info->sb->s_id; - - va_start(args, fmt); - vaf.va = &args; errstr = btrfs_decode_error(fs_info, errno, nbuf); - if (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR) - panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n", - s_id, function, line, &vaf, errstr); - - printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n", - s_id, function, line, &vaf, errstr); - va_end(args); - /* Caller calls BUG() */ + printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", + sb->s_id, function, line, errstr); + save_error_info(fs_info); + + btrfs_handle_error(fs_info); } static void btrfs_put_super(struct super_block *sb) @@ -278,7 +166,7 @@ enum { Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_skip_balance, Opt_check_integrity, Opt_check_integrity_including_extent_data, - Opt_check_integrity_print_mask, Opt_fatal_errors, + Opt_check_integrity_print_mask, Opt_err, }; @@ -318,14 +206,12 @@ static match_table_t tokens = { {Opt_check_integrity, "check_int"}, {Opt_check_integrity_including_extent_data, "check_int_data"}, {Opt_check_integrity_print_mask, "check_int_print_mask=%d"}, - {Opt_fatal_errors, "fatal_errors=%s"}, {Opt_err, NULL}, }; /* * Regular mount options parser. Everything that is needed only when * reading in a new superblock is parsed here. - * XXX JDM: This needs to be cleaned up for remount. */ int btrfs_parse_options(struct btrfs_root *root, char *options) { @@ -552,18 +438,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) ret = -EINVAL; goto out; #endif - case Opt_fatal_errors: - if (strcmp(args[0].from, "panic") == 0) - btrfs_set_opt(info->mount_opt, - PANIC_ON_FATAL_ERROR); - else if (strcmp(args[0].from, "bug") == 0) - btrfs_clear_opt(info->mount_opt, - PANIC_ON_FATAL_ERROR); - else { - ret = -EINVAL; - goto out; - } - break; case Opt_err: printk(KERN_INFO "btrfs: unrecognized mount option " "'%s'\n", p); @@ -888,8 +762,6 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",inode_cache"); if (btrfs_test_opt(root, SKIP_BALANCE)) seq_puts(seq, ",skip_balance"); - if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR)) - seq_puts(seq, ",fatal_errors=panic"); return 0; } @@ -1123,20 +995,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root = fs_info->tree_root; - unsigned old_flags = sb->s_flags; - unsigned long old_opts = fs_info->mount_opt; - unsigned long old_compress_type = fs_info->compress_type; - u64 old_max_inline = fs_info->max_inline; - u64 old_alloc_start = fs_info->alloc_start; - int old_thread_pool_size = fs_info->thread_pool_size; - unsigned int old_metadata_ratio = fs_info->metadata_ratio; int ret; ret = btrfs_parse_options(root, data); - if (ret) { - ret = -EINVAL; - goto restore; - } + if (ret) + return -EINVAL; if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; @@ -1144,44 +1007,26 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (*flags & MS_RDONLY) { sb->s_flags |= MS_RDONLY; - ret = btrfs_commit_super(root); - if (ret) - goto restore; + ret = btrfs_commit_super(root); + WARN_ON(ret); } else { if (fs_info->fs_devices->rw_devices == 0) - ret = -EACCES; - goto restore; + return -EACCES; if (btrfs_super_log_root(fs_info->super_copy) != 0) - ret = -EINVAL; - goto restore; + return -EINVAL; ret = btrfs_cleanup_fs_roots(fs_info); - if (ret) - goto restore; + WARN_ON(ret); /* recover relocation */ ret = btrfs_recover_relocation(root); - if (ret) - goto restore; + WARN_ON(ret); sb->s_flags &= ~MS_RDONLY; } return 0; - -restore: - /* We've hit an error - don't reset MS_RDONLY */ - if (sb->s_flags & MS_RDONLY) - old_flags |= MS_RDONLY; - sb->s_flags = old_flags; - fs_info->mount_opt = old_opts; - fs_info->compress_type = old_compress_type; - fs_info->max_inline = old_max_inline; - fs_info->alloc_start = old_alloc_start; - fs_info->thread_pool_size = old_thread_pool_size; - fs_info->metadata_ratio = old_metadata_ratio; - return ret; } /* Used to sort the devices by max_avail(descending sort) */ @@ -1511,7 +1356,9 @@ static int __init init_btrfs_fs(void) if (err) return err; - btrfs_init_compress(); + err = btrfs_init_compress(); + if (err) + goto free_sysfs; err = btrfs_init_cachep(); if (err) @@ -1537,8 +1384,6 @@ static int __init init_btrfs_fs(void) if (err) goto unregister_ioctl; - btrfs_init_lockdep(); - printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION); return 0; @@ -1554,6 +1399,7 @@ static int __init init_btrfs_fs(void) btrfs_destroy_cachep(); free_compress: btrfs_exit_compress(); +free_sysfs: btrfs_exit_sysfs(); return err; } diff --git a/trunk/fs/btrfs/transaction.c b/trunk/fs/btrfs/transaction.c index 8da29e8e4de1..04b77e3ceb7a 100644 --- a/trunk/fs/btrfs/transaction.c +++ b/trunk/fs/btrfs/transaction.c @@ -31,7 +31,7 @@ #define BTRFS_ROOT_TRANS_TAG 0 -void put_transaction(struct btrfs_transaction *transaction) +static noinline void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(atomic_read(&transaction->use_count) == 0); if (atomic_dec_and_test(&transaction->use_count)) { @@ -58,12 +58,6 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) spin_lock(&root->fs_info->trans_lock); loop: - /* The file system has been taken offline. No new transactions. */ - if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - spin_unlock(&root->fs_info->trans_lock); - return -EROFS; - } - if (root->fs_info->trans_no_join) { if (!nofail) { spin_unlock(&root->fs_info->trans_lock); @@ -73,8 +67,6 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) cur_trans = root->fs_info->running_transaction; if (cur_trans) { - if (cur_trans->aborted) - return cur_trans->aborted; atomic_inc(&cur_trans->use_count); atomic_inc(&cur_trans->num_writers); cur_trans->num_joined++; @@ -131,7 +123,6 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) root->fs_info->generation++; cur_trans->transid = root->fs_info->generation; root->fs_info->running_transaction = cur_trans; - cur_trans->aborted = 0; spin_unlock(&root->fs_info->trans_lock); return 0; @@ -327,7 +318,6 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, h->use_count = 1; h->block_rsv = NULL; h->orig_rsv = NULL; - h->aborted = 0; smp_mb(); if (cur_trans->blocked && may_wait_transaction(root, type)) { @@ -337,7 +327,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, if (num_bytes) { trace_btrfs_space_reservation(root->fs_info, "transaction", - h->transid, num_bytes, 1); + (u64)(unsigned long)h, + num_bytes, 1); h->block_rsv = &root->fs_info->trans_block_rsv; h->bytes_reserved = num_bytes; } @@ -449,7 +440,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_block_rsv *rsv = trans->block_rsv; int updates; - int err; smp_mb(); if (cur_trans->blocked || cur_trans->delayed_refs.flushing) @@ -463,11 +453,8 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, updates = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; - if (updates) { - err = btrfs_run_delayed_refs(trans, root, updates); - if (err) /* Error code will also eval true */ - return err; - } + if (updates) + btrfs_run_delayed_refs(trans, root, updates); trans->block_rsv = rsv; @@ -538,11 +525,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (throttle) btrfs_run_delayed_iputs(root); - if (trans->aborted || - root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - return -EIO; - } - return 0; } @@ -708,13 +690,11 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, ret = btrfs_update_root(trans, tree_root, &root->root_key, &root->root_item); - if (ret) - return ret; + BUG_ON(ret); old_root_used = btrfs_root_used(&root->root_item); ret = btrfs_write_dirty_block_groups(trans, root); - if (ret) - return ret; + BUG_ON(ret); } if (root != root->fs_info->extent_root) @@ -725,10 +705,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, /* * update all the cowonly tree roots on disk - * - * The error handling in this function may not be obvious. Any of the - * failures will cause the file system to go offline. We still need - * to clean up the delayed refs. */ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root) @@ -739,30 +715,22 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, int ret; ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (ret) - return ret; + BUG_ON(ret); eb = btrfs_lock_root_node(fs_info->tree_root); - ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, - 0, &eb); + btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); btrfs_tree_unlock(eb); free_extent_buffer(eb); - if (ret) - return ret; - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (ret) - return ret; + BUG_ON(ret); while (!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; list_del_init(next); root = list_entry(next, struct btrfs_root, dirty_list); - ret = update_cowonly_root(trans, root); - if (ret) - return ret; + update_cowonly_root(trans, root); } down_write(&fs_info->extent_commit_sem); @@ -906,7 +874,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); if (!new_root_item) { - ret = pending->error = -ENOMEM; + pending->error = -ENOMEM; goto fail; } @@ -943,24 +911,21 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, * insert the directory item */ ret = btrfs_set_inode_index(parent_inode, &index); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); ret = btrfs_insert_dir_item(trans, parent_root, dentry->d_name.name, dentry->d_name.len, parent_inode, &key, BTRFS_FT_DIR, index); - if (ret == -EEXIST) { + if (ret) { pending->error = -EEXIST; dput(parent); goto fail; - } else if (ret) { - goto abort_trans_dput; } btrfs_i_size_write(parent_inode, parent_inode->i_size + dentry->d_name.len * 2); ret = btrfs_update_inode(trans, parent_root, parent_inode); - if (ret) - goto abort_trans_dput; + BUG_ON(ret); /* * pull in the delayed directory update @@ -969,10 +934,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, * snapshot */ ret = btrfs_run_delayed_items(trans, root); - if (ret) { /* Transaction aborted */ - dput(parent); - goto fail; - } + BUG_ON(ret); record_root_in_trans(trans, root); btrfs_set_root_last_snapshot(&root->root_item, trans->transid); @@ -987,21 +949,12 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_set_root_flags(new_root_item, root_flags); old = btrfs_lock_root_node(root); - ret = btrfs_cow_block(trans, root, old, NULL, 0, &old); - if (ret) { - btrfs_tree_unlock(old); - free_extent_buffer(old); - goto abort_trans_dput; - } - + btrfs_cow_block(trans, root, old, NULL, 0, &old); btrfs_set_lock_blocking(old); - ret = btrfs_copy_root(trans, root, old, &tmp, objectid); - /* clean up in any case */ + btrfs_copy_root(trans, root, old, &tmp, objectid); btrfs_tree_unlock(old); free_extent_buffer(old); - if (ret) - goto abort_trans_dput; /* see comments in should_cow_block() */ root->force_cow = 1; @@ -1013,8 +966,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); btrfs_tree_unlock(tmp); free_extent_buffer(tmp); - if (ret) - goto abort_trans_dput; + BUG_ON(ret); /* * insert root back/forward references @@ -1023,32 +975,19 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, parent_root->root_key.objectid, btrfs_ino(parent_inode), index, dentry->d_name.name, dentry->d_name.len); + BUG_ON(ret); dput(parent); - if (ret) - goto fail; key.offset = (u64)-1; pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); - if (IS_ERR(pending->snap)) { - ret = PTR_ERR(pending->snap); - goto abort_trans; - } + BUG_ON(IS_ERR(pending->snap)); - ret = btrfs_reloc_post_snapshot(trans, pending); - if (ret) - goto abort_trans; - ret = 0; + btrfs_reloc_post_snapshot(trans, pending); fail: kfree(new_root_item); trans->block_rsv = rsv; btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); - return ret; - -abort_trans_dput: - dput(parent); -abort_trans: - btrfs_abort_transaction(trans, root, ret); - goto fail; + return 0; } /* @@ -1185,33 +1124,6 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, return 0; } - -static void cleanup_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - struct btrfs_transaction *cur_trans = trans->transaction; - - WARN_ON(trans->use_count > 1); - - spin_lock(&root->fs_info->trans_lock); - list_del_init(&cur_trans->list); - spin_unlock(&root->fs_info->trans_lock); - - btrfs_cleanup_one_transaction(trans->transaction, root); - - put_transaction(cur_trans); - put_transaction(cur_trans); - - trace_btrfs_transaction_commit(root); - - btrfs_scrub_continue(root); - - if (current->journal_info == trans) - current->journal_info = NULL; - - kmem_cache_free(btrfs_trans_handle_cachep, trans); -} - /* * btrfs_transaction state sequence: * in_commit = 0, blocked = 0 (initial) @@ -1223,10 +1135,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { unsigned long joined = 0; - struct btrfs_transaction *cur_trans = trans->transaction; + struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; DEFINE_WAIT(wait); - int ret = -EIO; + int ret; int should_grow = 0; unsigned long now = get_seconds(); int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); @@ -1236,18 +1148,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_trans_release_metadata(trans, root); trans->block_rsv = NULL; - if (cur_trans->aborted) - goto cleanup_transaction; - /* make a pass through all the delayed refs we have so far * any runnings procs may add more while we are here */ ret = btrfs_run_delayed_refs(trans, root, 0); - if (ret) - goto cleanup_transaction; + BUG_ON(ret); cur_trans = trans->transaction; - /* * set the flushing flag so procs in this transaction have to * start sending their work down. @@ -1255,20 +1162,19 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, cur_trans->delayed_refs.flushing = 1; ret = btrfs_run_delayed_refs(trans, root, 0); - if (ret) - goto cleanup_transaction; + BUG_ON(ret); spin_lock(&cur_trans->commit_lock); if (cur_trans->in_commit) { spin_unlock(&cur_trans->commit_lock); atomic_inc(&cur_trans->use_count); - ret = btrfs_end_transaction(trans, root); + btrfs_end_transaction(trans, root); wait_for_commit(root, cur_trans); put_transaction(cur_trans); - return ret; + return 0; } trans->transaction->in_commit = 1; @@ -1308,12 +1214,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (flush_on_commit || snap_pending) { btrfs_start_delalloc_inodes(root, 1); - btrfs_wait_ordered_extents(root, 0, 1); + ret = btrfs_wait_ordered_extents(root, 0, 1); + BUG_ON(ret); } ret = btrfs_run_delayed_items(trans, root); - if (ret) - goto cleanup_transaction; + BUG_ON(ret); /* * rename don't use btrfs_join_transaction, so, once we @@ -1355,22 +1261,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->reloc_mutex); ret = btrfs_run_delayed_items(trans, root); - if (ret) { - mutex_unlock(&root->fs_info->reloc_mutex); - goto cleanup_transaction; - } + BUG_ON(ret); ret = create_pending_snapshots(trans, root->fs_info); - if (ret) { - mutex_unlock(&root->fs_info->reloc_mutex); - goto cleanup_transaction; - } + BUG_ON(ret); ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (ret) { - mutex_unlock(&root->fs_info->reloc_mutex); - goto cleanup_transaction; - } + BUG_ON(ret); /* * make sure none of the code above managed to slip in a @@ -1397,10 +1294,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->tree_log_mutex); ret = commit_fs_roots(trans, root); - if (ret) { - mutex_unlock(&root->fs_info->tree_log_mutex); - goto cleanup_transaction; - } + BUG_ON(ret); /* commit_fs_roots gets rid of all the tree log roots, it is now * safe to free the root of tree log roots @@ -1408,10 +1302,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_free_log_root_tree(trans, root->fs_info); ret = commit_cowonly_roots(trans, root); - if (ret) { - mutex_unlock(&root->fs_info->tree_log_mutex); - goto cleanup_transaction; - } + BUG_ON(ret); btrfs_prepare_extent_commit(trans, root); @@ -1445,18 +1336,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wake_up(&root->fs_info->transaction_wait); ret = btrfs_write_and_wait_transaction(trans, root); - if (ret) { - btrfs_error(root->fs_info, ret, - "Error while writing out transaction."); - mutex_unlock(&root->fs_info->tree_log_mutex); - goto cleanup_transaction; - } - - ret = write_ctree_super(trans, root, 0); - if (ret) { - mutex_unlock(&root->fs_info->tree_log_mutex); - goto cleanup_transaction; - } + BUG_ON(ret); + write_ctree_super(trans, root, 0); /* * the super is written, we can safely allow the tree-loggers @@ -1492,15 +1373,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_run_delayed_iputs(root); return ret; - -cleanup_transaction: - btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n"); -// WARN_ON(1); - if (current->journal_info == trans) - current->journal_info = NULL; - cleanup_transaction(trans, root); - - return ret; } /* @@ -1516,8 +1388,6 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) spin_unlock(&fs_info->trans_lock); while (!list_empty(&list)) { - int ret; - root = list_entry(list.next, struct btrfs_root, root_list); list_del(&root->root_list); @@ -1525,10 +1395,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) if (btrfs_header_backref_rev(root->node) < BTRFS_MIXED_BACKREF_REV) - ret = btrfs_drop_snapshot(root, NULL, 0, 0); + btrfs_drop_snapshot(root, NULL, 0, 0); else - ret =btrfs_drop_snapshot(root, NULL, 1, 0); - BUG_ON(ret < 0); + btrfs_drop_snapshot(root, NULL, 1, 0); } return 0; } diff --git a/trunk/fs/btrfs/transaction.h b/trunk/fs/btrfs/transaction.h index fe27379e368b..02564e6230ac 100644 --- a/trunk/fs/btrfs/transaction.h +++ b/trunk/fs/btrfs/transaction.h @@ -43,7 +43,6 @@ struct btrfs_transaction { wait_queue_head_t commit_wait; struct list_head pending_snapshots; struct btrfs_delayed_ref_root delayed_refs; - int aborted; }; struct btrfs_trans_handle { @@ -56,7 +55,6 @@ struct btrfs_trans_handle { struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *orig_rsv; - int aborted; }; struct btrfs_pending_snapshot { @@ -116,5 +114,4 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark); int btrfs_transaction_blocked(struct btrfs_fs_info *info); int btrfs_transaction_in_commit(struct btrfs_fs_info *info); -void put_transaction(struct btrfs_transaction *transaction); #endif diff --git a/trunk/fs/btrfs/tree-log.c b/trunk/fs/btrfs/tree-log.c index d017283ae6f5..966cc74f5d6c 100644 --- a/trunk/fs/btrfs/tree-log.c +++ b/trunk/fs/btrfs/tree-log.c @@ -212,13 +212,14 @@ int btrfs_pin_log_trans(struct btrfs_root *root) * indicate we're done making changes to the log tree * and wake up anyone waiting to do a sync */ -void btrfs_end_log_trans(struct btrfs_root *root) +int btrfs_end_log_trans(struct btrfs_root *root) { if (atomic_dec_and_test(&root->log_writers)) { smp_mb(); if (waitqueue_active(&root->log_writer_wait)) wake_up(&root->log_writer_wait); } + return 0; } @@ -377,11 +378,12 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, u32 found_size; found_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); - if (found_size > item_size) + if (found_size > item_size) { btrfs_truncate_item(trans, root, path, item_size, 1); - else if (found_size < item_size) - btrfs_extend_item(trans, root, path, - item_size - found_size); + } else if (found_size < item_size) { + ret = btrfs_extend_item(trans, root, path, + item_size - found_size); + } } else if (ret) { return ret; } @@ -1761,7 +1763,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, BTRFS_TREE_LOG_OBJECTID); ret = btrfs_free_and_pin_reserved_extent(root, bytenr, blocksize); - BUG_ON(ret); /* -ENOMEM or logic errors */ + BUG_ON(ret); } free_extent_buffer(next); continue; @@ -1869,26 +1871,20 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, wret = walk_down_log_tree(trans, log, path, &level, wc); if (wret > 0) break; - if (wret < 0) { + if (wret < 0) ret = wret; - goto out; - } wret = walk_up_log_tree(trans, log, path, &level, wc); if (wret > 0) break; - if (wret < 0) { + if (wret < 0) ret = wret; - goto out; - } } /* was the root node processed? if not, catch it here */ if (path->nodes[orig_level]) { - ret = wc->process_func(log, path->nodes[orig_level], wc, + wc->process_func(log, path->nodes[orig_level], wc, btrfs_header_generation(path->nodes[orig_level])); - if (ret) - goto out; if (wc->free) { struct extent_buffer *next; @@ -1904,11 +1900,10 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, BTRFS_TREE_LOG_OBJECTID); ret = btrfs_free_and_pin_reserved_extent(log, next->start, next->len); - BUG_ON(ret); /* -ENOMEM or logic errors */ + BUG_ON(ret); } } -out: for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { free_extent_buffer(path->nodes[i]); @@ -1968,8 +1963,8 @@ static int wait_log_commit(struct btrfs_trans_handle *trans, return 0; } -static void wait_for_writer(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static int wait_for_writer(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { DEFINE_WAIT(wait); while (root->fs_info->last_trans_log_full_commit != @@ -1983,6 +1978,7 @@ static void wait_for_writer(struct btrfs_trans_handle *trans, mutex_lock(&root->log_mutex); finish_wait(&root->log_writer_wait, &wait); } + return 0; } /* @@ -2050,11 +2046,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * wait for them until later. */ ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - mutex_unlock(&root->log_mutex); - goto out; - } + BUG_ON(ret); btrfs_set_root_node(&log->root_item, log->node); @@ -2085,11 +2077,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } if (ret) { - if (ret != -ENOSPC) { - btrfs_abort_transaction(trans, root, ret); - mutex_unlock(&log_root_tree->log_mutex); - goto out; - } + BUG_ON(ret != -ENOSPC); root->fs_info->last_trans_log_full_commit = trans->transid; btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); mutex_unlock(&log_root_tree->log_mutex); @@ -2129,11 +2117,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_write_and_wait_marked_extents(log_root_tree, &log_root_tree->dirty_log_pages, EXTENT_DIRTY | EXTENT_NEW); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - mutex_unlock(&log_root_tree->log_mutex); - goto out_wake_log_root; - } + BUG_ON(ret); btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); btrfs_set_super_log_root(root->fs_info->super_for_commit, @@ -2342,9 +2326,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, if (ret == -ENOSPC) { root->fs_info->last_trans_log_full_commit = trans->transid; ret = 0; - } else if (ret < 0) - btrfs_abort_transaction(trans, root, ret); - + } btrfs_end_log_trans(root); return err; @@ -2375,8 +2357,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, if (ret == -ENOSPC) { root->fs_info->last_trans_log_full_commit = trans->transid; ret = 0; - } else if (ret < 0 && ret != -ENOENT) - btrfs_abort_transaction(trans, root, ret); + } btrfs_end_log_trans(root); return ret; @@ -3188,20 +3169,13 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) fs_info->log_root_recovering = 1; trans = btrfs_start_transaction(fs_info->tree_root, 0); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto error; - } + BUG_ON(IS_ERR(trans)); wc.trans = trans; wc.pin = 1; ret = walk_log_tree(trans, log_root_tree, &wc); - if (ret) { - btrfs_error(fs_info, ret, "Failed to pin buffers while " - "recovering log root tree."); - goto error; - } + BUG_ON(ret); again: key.objectid = BTRFS_TREE_LOG_OBJECTID; @@ -3210,12 +3184,8 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) while (1) { ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); - - if (ret < 0) { - btrfs_error(fs_info, ret, - "Couldn't find tree log root."); - goto error; - } + if (ret < 0) + break; if (ret > 0) { if (path->slots[0] == 0) break; @@ -3229,24 +3199,14 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) log = btrfs_read_fs_root_no_radix(log_root_tree, &found_key); - if (IS_ERR(log)) { - ret = PTR_ERR(log); - btrfs_error(fs_info, ret, - "Couldn't read tree log root."); - goto error; - } + BUG_ON(IS_ERR(log)); tmp_key.objectid = found_key.offset; tmp_key.type = BTRFS_ROOT_ITEM_KEY; tmp_key.offset = (u64)-1; wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); - if (IS_ERR(wc.replay_dest)) { - ret = PTR_ERR(wc.replay_dest); - btrfs_error(fs_info, ret, "Couldn't read target root " - "for tree log recovery."); - goto error; - } + BUG_ON(IS_ERR_OR_NULL(wc.replay_dest)); wc.replay_dest->log_root = log; btrfs_record_root_in_trans(trans, wc.replay_dest); @@ -3294,10 +3254,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) kfree(log_root_tree); return 0; - -error: - btrfs_free_path(path); - return ret; } /* diff --git a/trunk/fs/btrfs/tree-log.h b/trunk/fs/btrfs/tree-log.h index 862ac813f6b8..2270ac58d746 100644 --- a/trunk/fs/btrfs/tree-log.h +++ b/trunk/fs/btrfs/tree-log.h @@ -38,7 +38,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, struct inode *inode, u64 dirid); -void btrfs_end_log_trans(struct btrfs_root *root); +int btrfs_end_log_trans(struct btrfs_root *root); int btrfs_pin_log_trans(struct btrfs_root *root); int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, diff --git a/trunk/fs/btrfs/volumes.c b/trunk/fs/btrfs/volumes.c index a872b48be0ae..ef41f285a475 100644 --- a/trunk/fs/btrfs/volumes.c +++ b/trunk/fs/btrfs/volumes.c @@ -67,7 +67,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) kfree(fs_devices); } -void btrfs_cleanup_fs_uuids(void) +int btrfs_cleanup_fs_uuids(void) { struct btrfs_fs_devices *fs_devices; @@ -77,6 +77,7 @@ void btrfs_cleanup_fs_uuids(void) list_del(&fs_devices->list); free_fs_devices(fs_devices); } + return 0; } static noinline struct btrfs_device *__find_device(struct list_head *head, @@ -129,7 +130,7 @@ static void requeue_list(struct btrfs_pending_bios *pending_bios, * the list if the block device is congested. This way, multiple devices * can make progress from a single worker thread. */ -static noinline void run_scheduled_bios(struct btrfs_device *device) +static noinline int run_scheduled_bios(struct btrfs_device *device) { struct bio *pending; struct backing_dev_info *bdi; @@ -315,6 +316,7 @@ static noinline void run_scheduled_bios(struct btrfs_device *device) done: blk_finish_plug(&plug); + return 0; } static void pending_bios_fn(struct btrfs_work *work) @@ -453,7 +455,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) return ERR_PTR(-ENOMEM); } -void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) +int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device, *next; @@ -501,6 +503,7 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) fs_devices->latest_trans = latest_transid; mutex_unlock(&uuid_mutex); + return 0; } static void __free_device(struct work_struct *work) @@ -549,10 +552,10 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) fs_devices->num_can_discard--; new_device = kmalloc(sizeof(*new_device), GFP_NOFS); - BUG_ON(!new_device); /* -ENOMEM */ + BUG_ON(!new_device); memcpy(new_device, device, sizeof(*new_device)); new_device->name = kstrdup(device->name, GFP_NOFS); - BUG_ON(device->name && !new_device->name); /* -ENOMEM */ + BUG_ON(device->name && !new_device->name); new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; @@ -622,8 +625,6 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, printk(KERN_INFO "open %s failed\n", device->name); goto error; } - filemap_write_and_wait(bdev->bd_inode->i_mapping); - invalidate_bdev(bdev); set_blocksize(bdev, 4096); bh = btrfs_read_dev_super(bdev); @@ -1038,10 +1039,8 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); - } else { - btrfs_error(root->fs_info, ret, "Slot search failed"); - goto out; } + BUG_ON(ret); if (device->bytes_used > 0) { u64 len = btrfs_dev_extent_length(leaf, extent); @@ -1051,10 +1050,7 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, spin_unlock(&root->fs_info->free_chunk_lock); } ret = btrfs_del_item(trans, root, path); - if (ret) { - btrfs_error(root->fs_info, ret, - "Failed to remove dev extent item"); - } + out: btrfs_free_path(path); return ret; @@ -1082,8 +1078,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, key.type = BTRFS_DEV_EXTENT_KEY; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent)); - if (ret) - goto out; + BUG_ON(ret); leaf = path->nodes[0]; extent = btrfs_item_ptr(leaf, path->slots[0], @@ -1098,7 +1093,6 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); -out: btrfs_free_path(path); return ret; } @@ -1124,7 +1118,7 @@ static noinline int find_next_chunk(struct btrfs_root *root, if (ret < 0) goto error; - BUG_ON(ret == 0); /* Corruption */ + BUG_ON(ret == 0); ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); if (ret) { @@ -1168,7 +1162,7 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) if (ret < 0) goto error; - BUG_ON(ret == 0); /* Corruption */ + BUG_ON(ret == 0); ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY); @@ -1356,7 +1350,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } set_blocksize(bdev, 4096); - invalidate_bdev(bdev); bh = btrfs_read_dev_super(bdev); if (!bh) { ret = -EINVAL; @@ -1603,7 +1596,7 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); - BUG_ON(!device); /* Logic error */ + BUG_ON(!device); if (device->fs_devices->seeding) { btrfs_set_device_generation(leaf, dev_item, @@ -1713,7 +1706,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (seeding_dev) { sb->s_flags &= ~MS_RDONLY; ret = btrfs_prepare_sprout(root); - BUG_ON(ret); /* -ENOMEM */ + BUG_ON(ret); } device->fs_devices = root->fs_info->fs_devices; @@ -1751,15 +1744,11 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (seeding_dev) { ret = init_first_rw_device(trans, root, device); - if (ret) - goto error_trans; + BUG_ON(ret); ret = btrfs_finish_sprout(trans, root); - if (ret) - goto error_trans; + BUG_ON(ret); } else { ret = btrfs_add_device(trans, root, device); - if (ret) - goto error_trans; } /* @@ -1769,31 +1758,17 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) btrfs_clear_space_info_full(root->fs_info); unlock_chunks(root); - ret = btrfs_commit_transaction(trans, root); + btrfs_commit_transaction(trans, root); if (seeding_dev) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); - if (ret) /* transaction commit */ - return ret; - ret = btrfs_relocate_sys_chunks(root); - if (ret < 0) - btrfs_error(root->fs_info, ret, - "Failed to relocate sys chunks after " - "device initialization. This can be fixed " - "using the \"btrfs balance\" command."); + BUG_ON(ret); } return ret; - -error_trans: - unlock_chunks(root); - btrfs_abort_transaction(trans, root, ret); - btrfs_end_transaction(trans, root); - kfree(device->name); - kfree(device); error: blkdev_put(bdev, FMODE_EXCL); if (seeding_dev) { @@ -1901,20 +1876,10 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, key.type = BTRFS_CHUNK_ITEM_KEY; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto out; - else if (ret > 0) { /* Logic error or corruption */ - btrfs_error(root->fs_info, -ENOENT, - "Failed lookup while freeing chunk."); - ret = -ENOENT; - goto out; - } + BUG_ON(ret); ret = btrfs_del_item(trans, root, path); - if (ret < 0) - btrfs_error(root->fs_info, ret, - "Failed to delete chunk item."); -out: + btrfs_free_path(path); return ret; } @@ -2076,7 +2041,7 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); if (ret < 0) goto error; - BUG_ON(ret == 0); /* Corruption */ + BUG_ON(ret == 0); ret = btrfs_previous_item(chunk_root, path, key.objectid, key.type); @@ -2285,13 +2250,15 @@ static void unset_balance_control(struct btrfs_fs_info *fs_info) * Balance filters. Return 1 if chunk should be filtered out * (should not be balanced). */ -static int chunk_profiles_filter(u64 chunk_type, +static int chunk_profiles_filter(u64 chunk_profile, struct btrfs_balance_args *bargs) { - chunk_type = chunk_to_extended(chunk_type) & - BTRFS_EXTENDED_PROFILE_MASK; + chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK; + + if (chunk_profile == 0) + chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE; - if (bargs->profiles & chunk_type) + if (bargs->profiles & chunk_profile) return 0; return 1; @@ -2398,16 +2365,18 @@ static int chunk_vrange_filter(struct extent_buffer *leaf, return 1; } -static int chunk_soft_convert_filter(u64 chunk_type, +static int chunk_soft_convert_filter(u64 chunk_profile, struct btrfs_balance_args *bargs) { if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT)) return 0; - chunk_type = chunk_to_extended(chunk_type) & - BTRFS_EXTENDED_PROFILE_MASK; + chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK; - if (bargs->target == chunk_type) + if (chunk_profile == 0) + chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE; + + if (bargs->target & chunk_profile) return 1; return 0; @@ -2633,30 +2602,6 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) return ret; } -/** - * alloc_profile_is_valid - see if a given profile is valid and reduced - * @flags: profile to validate - * @extended: if true @flags is treated as an extended profile - */ -static int alloc_profile_is_valid(u64 flags, int extended) -{ - u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK : - BTRFS_BLOCK_GROUP_PROFILE_MASK); - - flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK; - - /* 1) check that all other bits are zeroed */ - if (flags & ~mask) - return 0; - - /* 2) see if profile is reduced */ - if (flags == 0) - return !extended; /* "0" is valid for usual profiles */ - - /* true if exactly one bit set */ - return (flags & (flags - 1)) == 0; -} - static inline int balance_need_close(struct btrfs_fs_info *fs_info) { /* cancel requested || normal exit path */ @@ -2685,7 +2630,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl, { struct btrfs_fs_info *fs_info = bctl->fs_info; u64 allowed; - int mixed = 0; int ret; if (btrfs_fs_closing(fs_info) || @@ -2695,16 +2639,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl, goto out; } - allowed = btrfs_super_incompat_flags(fs_info->super_copy); - if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) - mixed = 1; - /* * In case of mixed groups both data and meta should be picked, * and identical options should be given for both of them. */ - allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA; - if (mixed && (bctl->flags & allowed)) { + allowed = btrfs_super_incompat_flags(fs_info->super_copy); + if ((allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && + (bctl->flags & (BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA))) { if (!(bctl->flags & BTRFS_BALANCE_DATA) || !(bctl->flags & BTRFS_BALANCE_METADATA) || memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) { @@ -2715,6 +2656,14 @@ int btrfs_balance(struct btrfs_balance_control *bctl, } } + /* + * Profile changing sanity checks. Skip them if a simple + * balance is requested. + */ + if (!((bctl->data.flags | bctl->sys.flags | bctl->meta.flags) & + BTRFS_BALANCE_ARGS_CONVERT)) + goto do_balance; + allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; if (fs_info->fs_devices->num_devices == 1) allowed |= BTRFS_BLOCK_GROUP_DUP; @@ -2724,27 +2673,24 @@ int btrfs_balance(struct btrfs_balance_control *bctl, allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10); - if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (!alloc_profile_is_valid(bctl->data.target, 1) || - (bctl->data.target & ~allowed))) { + if (!profile_is_valid(bctl->data.target, 1) || + bctl->data.target & ~allowed) { printk(KERN_ERR "btrfs: unable to start balance with target " "data profile %llu\n", (unsigned long long)bctl->data.target); ret = -EINVAL; goto out; } - if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (!alloc_profile_is_valid(bctl->meta.target, 1) || - (bctl->meta.target & ~allowed))) { + if (!profile_is_valid(bctl->meta.target, 1) || + bctl->meta.target & ~allowed) { printk(KERN_ERR "btrfs: unable to start balance with target " "metadata profile %llu\n", (unsigned long long)bctl->meta.target); ret = -EINVAL; goto out; } - if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (!alloc_profile_is_valid(bctl->sys.target, 1) || - (bctl->sys.target & ~allowed))) { + if (!profile_is_valid(bctl->sys.target, 1) || + bctl->sys.target & ~allowed) { printk(KERN_ERR "btrfs: unable to start balance with target " "system profile %llu\n", (unsigned long long)bctl->sys.target); @@ -2752,9 +2698,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, goto out; } - /* allow dup'ed data chunks only in mixed mode */ - if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) { + if (bctl->data.target & BTRFS_BLOCK_GROUP_DUP) { printk(KERN_ERR "btrfs: dup for data is not allowed\n"); ret = -EINVAL; goto out; @@ -2780,6 +2724,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, } } +do_balance: ret = insert_balance_item(fs_info->tree_root, bctl); if (ret && ret != -EEXIST) goto out; @@ -3022,7 +2967,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) key.offset = (u64)-1; key.type = BTRFS_DEV_EXTENT_KEY; - do { + while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto done; @@ -3064,7 +3009,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) goto done; if (ret == -ENOSPC) failed++; - } while (key.offset-- > 0); + key.offset -= 1; + } if (failed && !retried) { failed = 0; @@ -3182,7 +3128,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int i; int j; - BUG_ON(!alloc_profile_is_valid(type, 0)); + if ((type & BTRFS_BLOCK_GROUP_RAID1) && + (type & BTRFS_BLOCK_GROUP_DUP)) { + WARN_ON(1); + type &= ~BTRFS_BLOCK_GROUP_DUP; + } if (list_empty(&fs_devices->alloc_list)) return -ENOSPC; @@ -3378,15 +3328,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); + BUG_ON(ret); free_extent_map(em); - if (ret) - goto error; ret = btrfs_make_block_group(trans, extent_root, 0, type, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); - if (ret) - goto error; + BUG_ON(ret); for (i = 0; i < map->num_stripes; ++i) { struct btrfs_device *device; @@ -3399,10 +3347,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, info->chunk_root->root_key.objectid, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, dev_offset, stripe_size); - if (ret) { - btrfs_abort_transaction(trans, extent_root, ret); - goto error; - } + BUG_ON(ret); } kfree(devices_info); @@ -3438,8 +3383,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, device = map->stripes[index].dev; device->bytes_used += stripe_size; ret = btrfs_update_device(trans, device); - if (ret) - goto out_free; + BUG_ON(ret); index++; } @@ -3476,19 +3420,16 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, key.offset = chunk_offset; ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size); + BUG_ON(ret); - if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) { - /* - * TODO: Cleanup of inserted chunk root in case of - * failure. - */ + if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_add_system_chunk(chunk_root, &key, chunk, item_size); + BUG_ON(ret); } -out_free: kfree(chunk); - return ret; + return 0; } /* @@ -3520,8 +3461,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, chunk_size, stripe_size); - if (ret) - return ret; + BUG_ON(ret); return 0; } @@ -3553,8 +3493,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, &stripe_size, chunk_offset, alloc_profile); - if (ret) - return ret; + BUG_ON(ret); sys_chunk_offset = chunk_offset + chunk_size; @@ -3565,12 +3504,10 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, &sys_chunk_size, &sys_stripe_size, sys_chunk_offset, alloc_profile); - if (ret) - goto abort; + BUG_ON(ret); ret = btrfs_add_device(trans, fs_info->chunk_root, device); - if (ret) - goto abort; + BUG_ON(ret); /* * Modifying chunk tree needs allocating new blocks from both @@ -3580,20 +3517,13 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, */ ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, chunk_size, stripe_size); - if (ret) - goto abort; + BUG_ON(ret); ret = __finish_chunk_alloc(trans, extent_root, sys_map, sys_chunk_offset, sys_chunk_size, sys_stripe_size); - if (ret) - goto abort; - + BUG_ON(ret); return 0; - -abort: - btrfs_abort_transaction(trans, root, ret); - return ret; } int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) @@ -3944,7 +3874,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, do_div(length, map->num_stripes); buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); - BUG_ON(!buf); /* -ENOMEM */ + BUG_ON(!buf); for (i = 0; i < map->num_stripes; i++) { if (devid && map->stripes[i].dev->devid != devid) @@ -4037,7 +3967,7 @@ struct async_sched { * This will add one bio to the pending list for a device and make sure * the work struct is scheduled. */ -static noinline void schedule_bio(struct btrfs_root *root, +static noinline int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, int rw, struct bio *bio) { @@ -4049,7 +3979,7 @@ static noinline void schedule_bio(struct btrfs_root *root, bio_get(bio); btrfsic_submit_bio(rw, bio); bio_put(bio); - return; + return 0; } /* @@ -4083,6 +4013,7 @@ static noinline void schedule_bio(struct btrfs_root *root, if (should_queue) btrfs_queue_worker(&root->fs_info->submit_workers, &device->work); + return 0; } int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, @@ -4105,8 +4036,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio, mirror_num); - if (ret) /* -ENOMEM */ - return ret; + BUG_ON(ret); total_devs = bbio->num_stripes; if (map_length < length) { @@ -4125,7 +4055,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, while (dev_nr < total_devs) { if (dev_nr < total_devs - 1) { bio = bio_clone(first_bio, GFP_NOFS); - BUG_ON(!bio); /* -ENOMEM */ + BUG_ON(!bio); } else { bio = first_bio; } @@ -4279,13 +4209,13 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, write_lock(&map_tree->map_tree.lock); ret = add_extent_mapping(&map_tree->map_tree, em); write_unlock(&map_tree->map_tree.lock); - BUG_ON(ret); /* Tree corruption */ + BUG_ON(ret); free_extent_map(em); return 0; } -static void fill_device_from_item(struct extent_buffer *leaf, +static int fill_device_from_item(struct extent_buffer *leaf, struct btrfs_dev_item *dev_item, struct btrfs_device *device) { @@ -4302,6 +4232,8 @@ static void fill_device_from_item(struct extent_buffer *leaf, ptr = (unsigned long)btrfs_device_uuid(dev_item); read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); + + return 0; } static int open_seed_devices(struct btrfs_root *root, u8 *fsid) @@ -4452,7 +4384,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) * to silence the warning eg. on PowerPC 64. */ if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE) - SetPageUptodate(sb->pages[0]); + SetPageUptodate(sb->first_page); write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); array_size = btrfs_super_sys_array_size(super_copy); diff --git a/trunk/fs/btrfs/volumes.h b/trunk/fs/btrfs/volumes.h index bb6b03f97aaa..19ac95048b88 100644 --- a/trunk/fs/btrfs/volumes.h +++ b/trunk/fs/btrfs/volumes.h @@ -260,12 +260,12 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, struct btrfs_fs_devices **fs_devices_ret); int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); -void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices); +int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices); int btrfs_add_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); int btrfs_rm_device(struct btrfs_root *root, char *device_path); -void btrfs_cleanup_fs_uuids(void); +int btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 new_size); diff --git a/trunk/include/trace/events/btrfs.h b/trunk/include/trace/events/btrfs.h index 91b91e805673..84f3001a568d 100644 --- a/trunk/include/trace/events/btrfs.h +++ b/trunk/include/trace/events/btrfs.h @@ -6,7 +6,6 @@ #include #include -#include struct btrfs_root; struct btrfs_fs_info; @@ -863,49 +862,6 @@ TRACE_EVENT(btrfs_setup_cluster, __entry->size, __entry->max_size, __entry->bitmap) ); -struct extent_state; -TRACE_EVENT(alloc_extent_state, - - TP_PROTO(struct extent_state *state, gfp_t mask, unsigned long IP), - - TP_ARGS(state, mask, IP), - - TP_STRUCT__entry( - __field(struct extent_state *, state) - __field(gfp_t, mask) - __field(unsigned long, ip) - ), - - TP_fast_assign( - __entry->state = state, - __entry->mask = mask, - __entry->ip = IP - ), - - TP_printk("state=%p; mask = %s; caller = %pF", __entry->state, - show_gfp_flags(__entry->mask), (void *)__entry->ip) -); - -TRACE_EVENT(free_extent_state, - - TP_PROTO(struct extent_state *state, unsigned long IP), - - TP_ARGS(state, IP), - - TP_STRUCT__entry( - __field(struct extent_state *, state) - __field(unsigned long, ip) - ), - - TP_fast_assign( - __entry->state = state, - __entry->ip = IP - ), - - TP_printk(" state=%p; caller = %pF", __entry->state, - (void *)__entry->ip) -); - #endif /* _TRACE_BTRFS_H */ /* This part must be outside protection */ diff --git a/trunk/sound/isa/opti9xx/opti92x-ad1848.c b/trunk/sound/isa/opti9xx/opti92x-ad1848.c index d7ccf28bd66a..babaedd242f7 100644 --- a/trunk/sound/isa/opti9xx/opti92x-ad1848.c +++ b/trunk/sound/isa/opti9xx/opti92x-ad1848.c @@ -65,7 +65,7 @@ static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */ static char *id = SNDRV_DEFAULT_STR1; /* ID for this card */ //static bool enable = SNDRV_DEFAULT_ENABLE1; /* Enable this card */ #ifdef CONFIG_PNP -static bool isapnp = true; /* Enable ISA PnP detection */ +static int isapnp = 1; /* Enable ISA PnP detection */ #endif static long port = SNDRV_DEFAULT_PORT1; /* 0x530,0xe80,0xf40,0x604 */ static long mpu_port = SNDRV_DEFAULT_PORT1; /* 0x300,0x310,0x320,0x330 */ diff --git a/trunk/sound/oss/msnd_pinnacle.c b/trunk/sound/oss/msnd_pinnacle.c index 2c79d60a725f..eba734560f6f 100644 --- a/trunk/sound/oss/msnd_pinnacle.c +++ b/trunk/sound/oss/msnd_pinnacle.c @@ -1631,7 +1631,7 @@ static int ide_irq __initdata = 0; static int joystick_io __initdata = 0; /* If we have the digital daugherboard... */ -static bool digital __initdata = false; +static int digital __initdata = 0; #endif static int fifosize __initdata = DEFFIFOSIZE; diff --git a/trunk/sound/pci/asihpi/hpi_internal.h b/trunk/sound/pci/asihpi/hpi_internal.h index 8c63200cf339..4cc315daeda0 100644 --- a/trunk/sound/pci/asihpi/hpi_internal.h +++ b/trunk/sound/pci/asihpi/hpi_internal.h @@ -42,7 +42,7 @@ On error *pLockedMemHandle marked invalid, non-zero returned. If this function succeeds, then HpiOs_LockedMem_GetVirtAddr() and HpiOs_LockedMem_GetPyhsAddr() will always succed on the returned handle. */ -int hpios_locked_mem_alloc(struct consistent_dma_area *p_locked_mem_handle, +u16 hpios_locked_mem_alloc(struct consistent_dma_area *p_locked_mem_handle, /**< memory handle */ u32 size, /**< Size in bytes to allocate */ struct pci_dev *p_os_reference diff --git a/trunk/sound/pci/asihpi/hpios.c b/trunk/sound/pci/asihpi/hpios.c index 87f4385fe8c7..2d7d1c2e1d0d 100644 --- a/trunk/sound/pci/asihpi/hpios.c +++ b/trunk/sound/pci/asihpi/hpios.c @@ -43,7 +43,7 @@ void hpios_delay_micro_seconds(u32 num_micro_sec) On error, return -ENOMEM, and *pMemArea.size = 0 */ -int hpios_locked_mem_alloc(struct consistent_dma_area *p_mem_area, u32 size, +u16 hpios_locked_mem_alloc(struct consistent_dma_area *p_mem_area, u32 size, struct pci_dev *pdev) { /*?? any benefit in using managed dmam_alloc_coherent? */ diff --git a/trunk/sound/pci/hda/patch_realtek.c b/trunk/sound/pci/hda/patch_realtek.c index 9917e55d6f11..8ea2fd654327 100644 --- a/trunk/sound/pci/hda/patch_realtek.c +++ b/trunk/sound/pci/hda/patch_realtek.c @@ -2717,6 +2717,9 @@ static int alc_auto_fill_adc_caps(struct hda_codec *codec) int max_nums = ARRAY_SIZE(spec->private_adc_nids); int i, nums = 0; + if (spec->shared_mic_hp) + max_nums = 1; /* no multi streams with the shared HP/mic */ + nid = codec->start_nid; for (i = 0; i < codec->num_nodes; i++, nid++) { hda_nid_t src; @@ -4073,7 +4076,6 @@ static void alc_remove_invalid_adc_nids(struct hda_codec *codec) if (spec->dyn_adc_switch) return; - again: nums = 0; for (n = 0; n < spec->num_adc_nids; n++) { hda_nid_t cap = spec->private_capsrc_nids[n]; @@ -4094,11 +4096,6 @@ static void alc_remove_invalid_adc_nids(struct hda_codec *codec) if (!nums) { /* check whether ADC-switch is possible */ if (!alc_check_dyn_adc_switch(codec)) { - if (spec->shared_mic_hp) { - spec->shared_mic_hp = 0; - spec->private_imux[0].num_items = 1; - goto again; - } printk(KERN_WARNING "hda_codec: %s: no valid ADC found;" " using fallback 0x%x\n", codec->chip_name, spec->private_adc_nids[0]); @@ -4116,7 +4113,7 @@ static void alc_remove_invalid_adc_nids(struct hda_codec *codec) if (spec->auto_mic) alc_auto_mic_check_imux(codec); /* check auto-mic setups */ - else if (spec->input_mux->num_items == 1 || spec->shared_mic_hp) + else if (spec->input_mux->num_items == 1) spec->num_adc_nids = 1; /* reduce to a single ADC */ } diff --git a/trunk/sound/soc/codecs/wm8994.c b/trunk/sound/soc/codecs/wm8994.c index 7c49642af052..fe7fbaeb7146 100644 --- a/trunk/sound/soc/codecs/wm8994.c +++ b/trunk/sound/soc/codecs/wm8994.c @@ -3629,7 +3629,7 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec) case 2: case 3: wm8994->hubs.dcs_codes_l = -9; - wm8994->hubs.dcs_codes_r = -7; + wm8994->hubs.dcs_codes_r = -5; break; default: break; diff --git a/trunk/sound/soc/soc-dmaengine-pcm.c b/trunk/sound/soc/soc-dmaengine-pcm.c index 475695234b3d..4420b7030c83 100644 --- a/trunk/sound/soc/soc-dmaengine-pcm.c +++ b/trunk/sound/soc/soc-dmaengine-pcm.c @@ -143,7 +143,7 @@ static int dmaengine_pcm_prepare_and_submit(struct snd_pcm_substream *substream) direction = snd_pcm_substream_to_dma_direction(substream); prtd->pos = 0; - desc = dmaengine_prep_dma_cyclic(chan, + desc = chan->device->device_prep_dma_cyclic(chan, substream->runtime->dma_addr, snd_pcm_lib_buffer_bytes(substream), snd_pcm_lib_period_bytes(substream), direction);