From 5e1d55329a1e76f29215b146a22e31dd45999a04 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 6 Feb 2009 11:45:46 +0000 Subject: [PATCH] --- yaml --- r: 131071 b: refs/heads/master c: 0bf2f3aec5474da80a60e1baca629af87ecb67b6 h: refs/heads/master i: 131069: a386c241546e477cbe015f789baceecd088e44ad 131067: b24afbab584f6556ae34e36a9bc7873ec8225ad0 131063: 58f75ca64db26f9230e48c5f45d98059b716fa20 131055: 5c79f1f60484b375f349a038218beda43e739f58 131039: b0f0b3acca07421562b142a41b9f7af9e2f3f04d 131007: 512be15cfd81ac616527bb2dac4a55d758802d37 130943: 33c0295a3809ec816993c94fa594d35a630d5f68 130815: 2fdfc7c6dcb209deb801c6cef1e057580051dfa4 130559: 8813e85455b51189188fd60e81270afea0483bca 130047: 1c6e33aa348a09cce2b27e193bf121abfa4520af 129023: 3549f31bd3edba658bc99309140bbd4d123e9fcf 126975: 5a5977599e8e4f8e2f284d41f6ae11c492c8faff 122879: b3224e7a67c3a427e66fac19d63da08dfe2ae7e8 114687: b5e6a135573c779a59c8ee52a2b1c8f9dfd54520 98303: 8d3fd52306c01a77327e5a8547cc363fa60bb45f 65535: 9f7407a1ae7917d67e0caeb8494b65a264d36c67 v: v3 --- [refs] | 2 +- trunk/MAINTAINERS | 8 - trunk/arch/x86/ia32/ia32entry.S | 8 +- trunk/fs/binfmt_elf.c | 14 +- trunk/fs/btrfs/Kconfig | 13 - trunk/fs/btrfs/async-thread.c | 61 +---- trunk/fs/btrfs/compression.c | 1 + trunk/fs/btrfs/ctree.c | 276 +++----------------- trunk/fs/btrfs/ctree.h | 28 +- trunk/fs/btrfs/disk-io.c | 120 +++------ trunk/fs/btrfs/disk-io.h | 2 - trunk/fs/btrfs/extent-tree.c | 438 ++++++-------------------------- trunk/fs/btrfs/extent_io.c | 132 ++-------- trunk/fs/btrfs/extent_io.h | 18 +- trunk/fs/btrfs/extent_map.c | 1 + trunk/fs/btrfs/file.c | 5 +- trunk/fs/btrfs/inode.c | 84 +----- trunk/fs/btrfs/ioctl.c | 1 + trunk/fs/btrfs/locking.c | 208 ++------------- trunk/fs/btrfs/locking.h | 6 - trunk/fs/btrfs/ordered-data.c | 4 +- trunk/fs/btrfs/ref-cache.c | 1 - trunk/fs/btrfs/ref-cache.h | 1 + trunk/fs/btrfs/super.c | 6 +- trunk/fs/btrfs/transaction.c | 4 +- trunk/fs/btrfs/tree-defrag.c | 1 - trunk/fs/btrfs/tree-log.c | 354 +++++++++++++------------- trunk/fs/btrfs/volumes.c | 49 ++-- trunk/fs/btrfs/xattr.c | 48 +--- trunk/fs/btrfs/xattr.h | 2 - trunk/fs/buffer.c | 2 +- trunk/fs/compat.c | 2 +- trunk/fs/ecryptfs/crypto.c | 4 +- trunk/fs/exec.c | 28 +- trunk/fs/internal.h | 2 +- 35 files changed, 484 insertions(+), 1450 deletions(-) diff --git a/[refs] b/[refs] index b096b072228a..2dd249857957 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: ae1a25da8448271a99745da03100d5299575a269 +refs/heads/master: 0bf2f3aec5474da80a60e1baca629af87ecb67b6 diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 0ea3a6d98714..421504b59c23 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -1021,14 +1021,6 @@ M: mb@bu3sch.de W: http://bu3sch.de/btgpio.php S: Maintained -BTRFS FILE SYSTEM -P: Chris Mason -M: chris.mason@oracle.com -L: linux-btrfs@vger.kernel.org -W: http://btrfs.wiki.kernel.org/ -T: git kernel.org:/pub/scm/linux/kernel/git/mason/btrfs-unstable.git -S: Maintained - BTTV VIDEO4LINUX DRIVER P: Mauro Carvalho Chehab M: mchehab@infradead.org diff --git a/trunk/arch/x86/ia32/ia32entry.S b/trunk/arch/x86/ia32/ia32entry.S index 5a0d76dc56a4..256b00b61892 100644 --- a/trunk/arch/x86/ia32/ia32entry.S +++ b/trunk/arch/x86/ia32/ia32entry.S @@ -418,9 +418,9 @@ ENTRY(ia32_syscall) orl $TS_COMPAT,TI_status(%r10) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) jnz ia32_tracesys +ia32_do_syscall: cmpl $(IA32_NR_syscalls-1),%eax - ja ia32_badsys -ia32_do_call: + ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ IA32_ARG_FIXUP call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: @@ -435,9 +435,7 @@ ia32_tracesys: call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - cmpl $(IA32_NR_syscalls-1),%eax - ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ - jmp ia32_do_call + jmp ia32_do_syscall END(ia32_syscall) ia32_badsys: diff --git a/trunk/fs/binfmt_elf.c b/trunk/fs/binfmt_elf.c index 33b7235f853b..e3ff2b9e602f 100644 --- a/trunk/fs/binfmt_elf.c +++ b/trunk/fs/binfmt_elf.c @@ -1208,11 +1208,9 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, * check for an ELF header. If we find one, dump the first page to * aid in determining what was mapped here. */ - if (FILTER(ELF_HEADERS) && - vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) { + if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) { u32 __user *header = (u32 __user *) vma->vm_start; u32 word; - mm_segment_t fs = get_fs(); /* * Doing it this way gets the constant folded by GCC. */ @@ -1225,15 +1223,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, magic.elfmag[EI_MAG1] = ELFMAG1; magic.elfmag[EI_MAG2] = ELFMAG2; magic.elfmag[EI_MAG3] = ELFMAG3; - /* - * Switch to the user "segment" for get_user(), - * then put back what elf_core_dump() had in place. - */ - set_fs(USER_DS); - if (unlikely(get_user(word, header))) - word = 0; - set_fs(fs); - if (word == magic.cmp) + if (get_user(word, header) == 0 && word == magic.cmp) return PAGE_SIZE; } diff --git a/trunk/fs/btrfs/Kconfig b/trunk/fs/btrfs/Kconfig index 7bb3c020e570..f8fcf999ea1b 100644 --- a/trunk/fs/btrfs/Kconfig +++ b/trunk/fs/btrfs/Kconfig @@ -16,16 +16,3 @@ config BTRFS_FS module will be called btrfs. If unsure, say N. - -config BTRFS_FS_POSIX_ACL - bool "Btrfs POSIX Access Control Lists" - depends on BTRFS_FS - select FS_POSIX_ACL - help - POSIX Access Control Lists (ACLs) support permissions for users and - groups beyond the owner/group/world scheme. - - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website . - - If you don't know what Access Control Lists are, say N diff --git a/trunk/fs/btrfs/async-thread.c b/trunk/fs/btrfs/async-thread.c index c84ca1f5259a..8e2fec05dbe0 100644 --- a/trunk/fs/btrfs/async-thread.c +++ b/trunk/fs/btrfs/async-thread.c @@ -16,11 +16,11 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include -#include -#include +# include #include "async-thread.h" #define WORK_QUEUED_BIT 0 @@ -143,7 +143,6 @@ static int worker_loop(void *arg) struct btrfs_work *work; do { spin_lock_irq(&worker->lock); -again_locked: while (!list_empty(&worker->pending)) { cur = worker->pending.next; work = list_entry(cur, struct btrfs_work, list); @@ -166,50 +165,14 @@ static int worker_loop(void *arg) check_idle_worker(worker); } + worker->working = 0; if (freezing(current)) { - worker->working = 0; - spin_unlock_irq(&worker->lock); refrigerator(); } else { + set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irq(&worker->lock); - if (!kthread_should_stop()) { - cpu_relax(); - /* - * we've dropped the lock, did someone else - * jump_in? - */ - smp_mb(); - if (!list_empty(&worker->pending)) - continue; - - /* - * this short schedule allows more work to - * come in without the queue functions - * needing to go through wake_up_process() - * - * worker->working is still 1, so nobody - * is going to try and wake us up - */ - schedule_timeout(1); - smp_mb(); - if (!list_empty(&worker->pending)) - continue; - - /* still no more work?, sleep for real */ - spin_lock_irq(&worker->lock); - set_current_state(TASK_INTERRUPTIBLE); - if (!list_empty(&worker->pending)) - goto again_locked; - - /* - * this makes sure we get a wakeup when someone - * adds something new to the queue - */ - worker->working = 0; - spin_unlock_irq(&worker->lock); - + if (!kthread_should_stop()) schedule(); - } __set_current_state(TASK_RUNNING); } } while (!kthread_should_stop()); @@ -387,14 +350,13 @@ int btrfs_requeue_work(struct btrfs_work *work) { struct btrfs_worker_thread *worker = work->worker; unsigned long flags; - int wake = 0; if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) goto out; spin_lock_irqsave(&worker->lock, flags); - list_add_tail(&work->list, &worker->pending); atomic_inc(&worker->num_pending); + list_add_tail(&work->list, &worker->pending); /* by definition we're busy, take ourselves off the idle * list @@ -406,16 +368,10 @@ int btrfs_requeue_work(struct btrfs_work *work) &worker->workers->worker_list); spin_unlock_irqrestore(&worker->workers->lock, flags); } - if (!worker->working) { - wake = 1; - worker->working = 1; - } spin_unlock_irqrestore(&worker->lock, flags); - if (wake) - wake_up_process(worker->task); -out: +out: return 0; } @@ -442,10 +398,9 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) } spin_lock_irqsave(&worker->lock, flags); - - list_add_tail(&work->list, &worker->pending); atomic_inc(&worker->num_pending); check_busy_worker(worker); + list_add_tail(&work->list, &worker->pending); /* * avoid calling into wake_up_process if this thread has already diff --git a/trunk/fs/btrfs/compression.c b/trunk/fs/btrfs/compression.c index ab07627084f1..ee848d8585d9 100644 --- a/trunk/fs/btrfs/compression.c +++ b/trunk/fs/btrfs/compression.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "compat.h" #include "ctree.h" diff --git a/trunk/fs/btrfs/ctree.c b/trunk/fs/btrfs/ctree.c index 551177c0011a..9e46c0776816 100644 --- a/trunk/fs/btrfs/ctree.c +++ b/trunk/fs/btrfs/ctree.c @@ -54,31 +54,6 @@ struct btrfs_path *btrfs_alloc_path(void) return path; } -/* - * set all locked nodes in the path to blocking locks. This should - * be done before scheduling - */ -noinline void btrfs_set_path_blocking(struct btrfs_path *p) -{ - int i; - for (i = 0; i < BTRFS_MAX_LEVEL; i++) { - if (p->nodes[i] && p->locks[i]) - btrfs_set_lock_blocking(p->nodes[i]); - } -} - -/* - * reset all the locked nodes in the patch to spinning locks. - */ -noinline void btrfs_clear_path_blocking(struct btrfs_path *p) -{ - int i; - for (i = 0; i < BTRFS_MAX_LEVEL; i++) { - if (p->nodes[i] && p->locks[i]) - btrfs_clear_lock_blocking(p->nodes[i]); - } -} - /* this also releases the path */ void btrfs_free_path(struct btrfs_path *p) { @@ -297,8 +272,6 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (IS_ERR(cow)) return PTR_ERR(cow); - /* cow is set to blocking by btrfs_init_new_buffer */ - copy_extent_buffer(cow, buf, 0, 0, cow->len); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); @@ -415,20 +388,17 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(1); } + spin_lock(&root->fs_info->hash_lock); if (btrfs_header_generation(buf) == trans->transid && btrfs_header_owner(buf) == root->root_key.objectid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { *cow_ret = buf; + spin_unlock(&root->fs_info->hash_lock); WARN_ON(prealloc_dest); return 0; } - + spin_unlock(&root->fs_info->hash_lock); search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); - - if (parent) - btrfs_set_lock_blocking(parent); - btrfs_set_lock_blocking(buf); - ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0, prealloc_dest); @@ -534,8 +504,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (parent_nritems == 1) return 0; - btrfs_set_lock_blocking(parent); - for (i = start_slot; i < end_slot; i++) { int close = 1; @@ -596,7 +564,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, search_start = last_block; btrfs_tree_lock(cur); - btrfs_set_lock_blocking(cur); err = __btrfs_cow_block(trans, root, cur, parent, i, &cur, search_start, min(16 * blocksize, @@ -895,7 +862,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, return 0; mid = path->nodes[level]; - WARN_ON(!path->locks[level]); WARN_ON(btrfs_header_generation(mid) != trans->transid); @@ -918,7 +884,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* promote the child to a root */ child = read_node_slot(root, mid, 0); btrfs_tree_lock(child); - btrfs_set_lock_blocking(child); BUG_ON(!child); ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); BUG_ON(ret); @@ -935,7 +900,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, add_root_to_dirty_list(root); btrfs_tree_unlock(child); - path->locks[level] = 0; path->nodes[level] = NULL; clean_tree_block(trans, root, mid); @@ -960,7 +924,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, left = read_node_slot(root, parent, pslot - 1); if (left) { btrfs_tree_lock(left); - btrfs_set_lock_blocking(left); wret = btrfs_cow_block(trans, root, left, parent, pslot - 1, &left, 0); if (wret) { @@ -971,7 +934,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, right = read_node_slot(root, parent, pslot + 1); if (right) { btrfs_tree_lock(right); - btrfs_set_lock_blocking(right); wret = btrfs_cow_block(trans, root, right, parent, pslot + 1, &right, 0); if (wret) { @@ -1147,8 +1109,6 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, u32 left_nr; btrfs_tree_lock(left); - btrfs_set_lock_blocking(left); - left_nr = btrfs_header_nritems(left); if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; @@ -1195,10 +1155,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, */ if (right) { u32 right_nr; - btrfs_tree_lock(right); - btrfs_set_lock_blocking(right); - right_nr = btrfs_header_nritems(right); if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; @@ -1253,7 +1210,8 @@ static noinline void reada_for_search(struct btrfs_root *root, struct btrfs_disk_key disk_key; u32 nritems; u64 search; - u64 target; + u64 lowest_read; + u64 highest_read; u64 nread = 0; int direction = path->reada; struct extent_buffer *eb; @@ -1277,7 +1235,8 @@ static noinline void reada_for_search(struct btrfs_root *root, return; } - target = search; + highest_read = search; + lowest_read = search; nritems = btrfs_header_nritems(node); nr = slot; @@ -1297,80 +1256,27 @@ static noinline void reada_for_search(struct btrfs_root *root, break; } search = btrfs_node_blockptr(node, nr); - if ((search <= target && target - search <= 65536) || - (search > target && search - target <= 65536)) { + if ((search >= lowest_read && search <= highest_read) || + (search < lowest_read && lowest_read - search <= 16384) || + (search > highest_read && search - highest_read <= 16384)) { readahead_tree_block(root, search, blocksize, btrfs_node_ptr_generation(node, nr)); nread += blocksize; } nscan++; - if ((nread > 65536 || nscan > 32)) + if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32)) break; - } -} - -/* - * returns -EAGAIN if it had to drop the path, or zero if everything was in - * cache - */ -static noinline int reada_for_balance(struct btrfs_root *root, - struct btrfs_path *path, int level) -{ - int slot; - int nritems; - struct extent_buffer *parent; - struct extent_buffer *eb; - u64 gen; - u64 block1 = 0; - u64 block2 = 0; - int ret = 0; - int blocksize; - parent = path->nodes[level - 1]; - if (!parent) - return 0; - - nritems = btrfs_header_nritems(parent); - slot = path->slots[level]; - blocksize = btrfs_level_size(root, level); + if (nread > (256 * 1024) || nscan > 128) + break; - if (slot > 0) { - block1 = btrfs_node_blockptr(parent, slot - 1); - gen = btrfs_node_ptr_generation(parent, slot - 1); - eb = btrfs_find_tree_block(root, block1, blocksize); - if (eb && btrfs_buffer_uptodate(eb, gen)) - block1 = 0; - free_extent_buffer(eb); - } - if (slot < nritems) { - block2 = btrfs_node_blockptr(parent, slot + 1); - gen = btrfs_node_ptr_generation(parent, slot + 1); - eb = btrfs_find_tree_block(root, block2, blocksize); - if (eb && btrfs_buffer_uptodate(eb, gen)) - block2 = 0; - free_extent_buffer(eb); - } - if (block1 || block2) { - ret = -EAGAIN; - btrfs_release_path(root, path); - if (block1) - readahead_tree_block(root, block1, blocksize, 0); - if (block2) - readahead_tree_block(root, block2, blocksize, 0); - - if (block1) { - eb = read_tree_block(root, block1, blocksize, 0); - free_extent_buffer(eb); - } - if (block1) { - eb = read_tree_block(root, block2, blocksize, 0); - free_extent_buffer(eb); - } + if (search < lowest_read) + lowest_read = search; + if (search > highest_read) + highest_read = search; } - return ret; } - /* * when we walk down the tree, it is usually safe to unlock the higher layers * in the tree. The exceptions are when our path goes through slot 0, because @@ -1421,32 +1327,6 @@ static noinline void unlock_up(struct btrfs_path *path, int level, } } -/* - * This releases any locks held in the path starting at level and - * going all the way up to the root. - * - * btrfs_search_slot will keep the lock held on higher nodes in a few - * corner cases, such as COW of the block at slot zero in the node. This - * ignores those rules, and it should only be called when there are no - * more updates to be done higher up in the tree. - */ -noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) -{ - int i; - - if (path->keep_locks || path->lowest_level) - return; - - for (i = level; i < BTRFS_MAX_LEVEL; i++) { - if (!path->nodes[i]) - continue; - if (!path->locks[i]) - continue; - btrfs_tree_unlock(path->nodes[i]); - path->locks[i] = 0; - } -} - /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -1507,30 +1387,31 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int wret; /* is a cow on this block not required */ + spin_lock(&root->fs_info->hash_lock); if (btrfs_header_generation(b) == trans->transid && btrfs_header_owner(b) == root->root_key.objectid && !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { + spin_unlock(&root->fs_info->hash_lock); goto cow_done; } + spin_unlock(&root->fs_info->hash_lock); /* ok, we have to cow, is our old prealloc the right * size? */ if (prealloc_block.objectid && prealloc_block.offset != b->len) { - btrfs_release_path(root, p); btrfs_free_reserved_extent(root, prealloc_block.objectid, prealloc_block.offset); prealloc_block.objectid = 0; - goto again; } /* * for higher level blocks, try not to allocate blocks * with the block and the parent locks held. */ - if (level > 0 && !prealloc_block.objectid && + if (level > 1 && !prealloc_block.objectid && btrfs_path_lock_waiting(p, level)) { u32 size = b->len; u64 hint = b->start; @@ -1544,8 +1425,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root goto again; } - btrfs_set_path_blocking(p); - wret = btrfs_cow_block(trans, root, b, p->nodes[level + 1], p->slots[level + 1], @@ -1567,22 +1446,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root if (!p->skip_locking) p->locks[level] = 1; - btrfs_clear_path_blocking(p); - - /* - * we have a lock on b and as long as we aren't changing - * the tree, there is no way to for the items in b to change. - * It is safe to drop the lock on our parent before we - * go through the expensive btree search on b. - * - * If cow is true, then we might be changing slot zero, - * which may require changing the parent. So, we can't - * drop the lock until after we know which slot we're - * operating on. - */ - if (!cow) - btrfs_unlock_up_safe(p, level + 1); - ret = check_block(root, p, level); if (ret) { ret = -1; @@ -1590,7 +1453,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root } ret = bin_search(b, key, level, &slot); - if (level != 0) { if (ret && slot > 0) slot -= 1; @@ -1598,16 +1460,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { - int sret; - - sret = reada_for_balance(root, p, level); - if (sret) - goto again; - - btrfs_set_path_blocking(p); - sret = split_node(trans, root, p, level); - btrfs_clear_path_blocking(p); - + int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); if (sret) { ret = sret; @@ -1615,19 +1468,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root } b = p->nodes[level]; slot = p->slots[level]; - } else if (ins_len < 0 && - btrfs_header_nritems(b) < - BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { - int sret; - - sret = reada_for_balance(root, p, level); - if (sret) - goto again; - - btrfs_set_path_blocking(p); - sret = balance_level(trans, root, p, level); - btrfs_clear_path_blocking(p); - + } else if (ins_len < 0) { + int sret = balance_level(trans, root, p, + level); if (sret) { ret = sret; goto done; @@ -1661,7 +1504,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root * of the btree by dropping locks before * we read. */ - if (level > 0) { + if (level > 1) { btrfs_release_path(NULL, p); if (tmp) free_extent_buffer(tmp); @@ -1676,7 +1519,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root free_extent_buffer(tmp); goto again; } else { - btrfs_set_path_blocking(p); if (tmp) free_extent_buffer(tmp); if (should_reada) @@ -1686,29 +1528,14 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root b = read_node_slot(root, b, slot); } } - if (!p->skip_locking) { - int lret; - - btrfs_clear_path_blocking(p); - lret = btrfs_try_spin_lock(b); - - if (!lret) { - btrfs_set_path_blocking(p); - btrfs_tree_lock(b); - btrfs_clear_path_blocking(p); - } - } + if (!p->skip_locking) + btrfs_tree_lock(b); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < ins_len) { - int sret; - - btrfs_set_path_blocking(p); - sret = split_leaf(trans, root, key, + int sret = split_leaf(trans, root, key, p, ins_len, ret == 0); - btrfs_clear_path_blocking(p); - BUG_ON(sret > 0); if (sret) { ret = sret; @@ -1722,16 +1549,12 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root } ret = 1; done: - /* - * we don't really know what they plan on doing with the path - * from here on, so for now just mark it as blocking - */ - btrfs_set_path_blocking(p); if (prealloc_block.objectid) { btrfs_free_reserved_extent(root, prealloc_block.objectid, prealloc_block.offset); } + return ret; } @@ -1755,8 +1578,6 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); BUG_ON(ret); - btrfs_set_lock_blocking(eb); - parent = eb; while (1) { level = btrfs_header_level(parent); @@ -1781,7 +1602,6 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, eb = read_tree_block(root, bytenr, blocksize, generation); btrfs_tree_lock(eb); - btrfs_set_lock_blocking(eb); } /* @@ -1806,7 +1626,6 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, eb = read_tree_block(root, bytenr, blocksize, generation); btrfs_tree_lock(eb); - btrfs_set_lock_blocking(eb); } ret = btrfs_cow_block(trans, root, eb, parent, slot, @@ -2353,8 +2172,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root right = read_node_slot(root, upper, slot + 1); btrfs_tree_lock(right); - btrfs_set_lock_blocking(right); - free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size) goto out_unlock; @@ -2550,8 +2367,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root left = read_node_slot(root, path->nodes[1], slot - 1); btrfs_tree_lock(left); - btrfs_set_lock_blocking(left); - free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size) { ret = 1; @@ -3010,12 +2825,6 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, path->keep_locks = 0; BUG_ON(ret); - /* - * make sure any changes to the path from split_leaf leave it - * in a blocking state - */ - btrfs_set_path_blocking(path); - leaf = path->nodes[0]; BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); @@ -3545,7 +3354,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, BUG(); } out: - btrfs_unlock_up_safe(path, 1); return ret; } @@ -3633,22 +3441,15 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, { int ret; u64 root_gen = btrfs_header_generation(path->nodes[1]); - u64 parent_start = path->nodes[1]->start; - u64 parent_owner = btrfs_header_owner(path->nodes[1]); ret = del_ptr(trans, root, path, 1, path->slots[1]); if (ret) return ret; - /* - * btrfs_free_extent is expensive, we want to make sure we - * aren't holding any locks when we call it - */ - btrfs_unlock_up_safe(path, 0); - ret = btrfs_free_extent(trans, root, bytenr, btrfs_level_size(root, 0), - parent_start, parent_owner, + path->nodes[1]->start, + btrfs_header_owner(path->nodes[1]), root_gen, 0, 1); return ret; } @@ -3920,14 +3721,12 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, */ if (slot >= nritems) { path->slots[level] = slot; - btrfs_set_path_blocking(path); sret = btrfs_find_next_key(root, path, min_key, level, cache_only, min_trans); if (sret == 0) { btrfs_release_path(root, path); goto again; } else { - btrfs_clear_path_blocking(path); goto out; } } @@ -3939,20 +3738,16 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, unlock_up(path, level, 1); goto out; } - btrfs_set_path_blocking(path); cur = read_node_slot(root, cur, slot); btrfs_tree_lock(cur); - path->locks[level - 1] = 1; path->nodes[level - 1] = cur; unlock_up(path, level, 1); - btrfs_clear_path_blocking(path); } out: if (ret == 0) memcpy(min_key, &found_key, sizeof(found_key)); - btrfs_set_path_blocking(path); return ret; } @@ -4048,7 +3843,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (ret < 0) return ret; - btrfs_set_path_blocking(path); nritems = btrfs_header_nritems(path->nodes[0]); /* * by releasing the path above we dropped all our locks. A balance @@ -4079,7 +3873,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) free_extent_buffer(next); } - /* the path was set to blocking above */ if (level == 1 && (path->locks[1] || path->skip_locking) && path->reada) reada_for_search(root, path, level, slot, 0); @@ -4088,7 +3881,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (!path->skip_locking) { WARN_ON(!btrfs_tree_locked(c)); btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); } break; } @@ -4105,15 +3897,12 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) path->locks[level] = 1; if (!level) break; - - btrfs_set_path_blocking(path); if (level == 1 && path->locks[1] && path->reada) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); if (!path->skip_locking) { WARN_ON(!btrfs_tree_locked(path->nodes[level])); btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); } } done: @@ -4138,7 +3927,6 @@ int btrfs_previous_item(struct btrfs_root *root, while (1) { if (path->slots[0] == 0) { - btrfs_set_path_blocking(path); ret = btrfs_prev_leaf(root, path); if (ret != 0) return ret; diff --git a/trunk/fs/btrfs/ctree.h b/trunk/fs/btrfs/ctree.h index 531db112c8bd..eee060f88113 100644 --- a/trunk/fs/btrfs/ctree.h +++ b/trunk/fs/btrfs/ctree.h @@ -454,11 +454,17 @@ struct btrfs_timespec { __le32 nsec; } __attribute__ ((__packed__)); -enum btrfs_compression_type { +typedef enum { BTRFS_COMPRESS_NONE = 0, BTRFS_COMPRESS_ZLIB = 1, BTRFS_COMPRESS_LAST = 2, -}; +} btrfs_compression_type; + +/* we don't understand any encryption methods right now */ +typedef enum { + BTRFS_ENCRYPTION_NONE = 0, + BTRFS_ENCRYPTION_LAST = 1, +} btrfs_encryption_type; struct btrfs_inode_item { /* nfs style generation number */ @@ -695,7 +701,9 @@ struct btrfs_fs_info { struct btrfs_transaction *running_transaction; wait_queue_head_t transaction_throttle; wait_queue_head_t transaction_wait; + wait_queue_head_t async_submit_wait; + wait_queue_head_t tree_log_wait; struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; @@ -703,6 +711,7 @@ struct btrfs_fs_info { struct super_block *sb; struct inode *btree_inode; struct backing_dev_info bdi; + spinlock_t hash_lock; struct mutex trans_mutex; struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; @@ -721,6 +730,10 @@ struct btrfs_fs_info { atomic_t async_submit_draining; atomic_t nr_async_bios; atomic_t async_delalloc_pages; + atomic_t tree_log_writers; + atomic_t tree_log_commit; + unsigned long tree_log_batch; + u64 tree_log_transid; /* * this is used by the balancing code to wait for all the pending @@ -820,14 +833,7 @@ struct btrfs_root { struct kobject root_kobj; struct completion kobj_unregister; struct mutex objectid_mutex; - struct mutex log_mutex; - wait_queue_head_t log_writer_wait; - wait_queue_head_t log_commit_wait[2]; - atomic_t log_writers; - atomic_t log_commit[2]; - unsigned long log_transid; - unsigned long log_batch; u64 objectid; u64 last_trans; @@ -1835,10 +1841,6 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); -void btrfs_set_path_blocking(struct btrfs_path *p); -void btrfs_clear_path_blocking(struct btrfs_path *p); -void btrfs_unlock_up_safe(struct btrfs_path *p, int level); - int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int slot, int nr); int btrfs_del_leaf(struct btrfs_trans_handle *trans, diff --git a/trunk/fs/btrfs/disk-io.c b/trunk/fs/btrfs/disk-io.c index 5aebddd71193..81a313874ae5 100644 --- a/trunk/fs/btrfs/disk-io.c +++ b/trunk/fs/btrfs/disk-io.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include @@ -799,7 +800,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) - set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); + buf->flags |= EXTENT_UPTODATE; else WARN_ON(1); return buf; @@ -813,10 +814,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) { WARN_ON(!btrfs_tree_locked(buf)); - - /* ugh, clear_extent_buffer_dirty can be expensive */ - btrfs_set_lock_blocking(buf); - clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } @@ -853,14 +850,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); - init_waitqueue_head(&root->log_writer_wait); - init_waitqueue_head(&root->log_commit_wait[0]); - init_waitqueue_head(&root->log_commit_wait[1]); - atomic_set(&root->log_commit[0], 0); - atomic_set(&root->log_commit[1], 0); - atomic_set(&root->log_writers, 0); - root->log_batch = 0; - root->log_transid = 0; extent_io_tree_init(&root->dirty_log_pages, fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -945,16 +934,15 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, return 0; } -static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; - struct extent_buffer *leaf; root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) - return ERR_PTR(-ENOMEM); + return -ENOMEM; __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, @@ -963,23 +951,12 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; - /* - * log trees do not get reference counted because they go away - * before a real commit is actually done. They do store pointers - * to file data extents, and those reference counts still get - * updated (along with back refs to the log tree). - */ root->ref_cows = 0; - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, - 0, BTRFS_TREE_LOG_OBJECTID, - trans->transid, 0, 0, 0); - if (IS_ERR(leaf)) { - kfree(root); - return ERR_CAST(leaf); - } + root->node = btrfs_alloc_free_block(trans, root, root->leafsize, + 0, BTRFS_TREE_LOG_OBJECTID, + trans->transid, 0, 0, 0); - root->node = leaf; btrfs_set_header_nritems(root->node, 0); btrfs_set_header_level(root->node, 0); btrfs_set_header_bytenr(root->node, root->node->start); @@ -991,48 +968,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, BTRFS_FSID_SIZE); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); - return root; -} - -int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - struct btrfs_root *log_root; - - log_root = alloc_log_tree(trans, fs_info); - if (IS_ERR(log_root)) - return PTR_ERR(log_root); - WARN_ON(fs_info->log_root_tree); - fs_info->log_root_tree = log_root; - return 0; -} - -int btrfs_add_log_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - struct btrfs_root *log_root; - struct btrfs_inode_item *inode_item; - - log_root = alloc_log_tree(trans, root->fs_info); - if (IS_ERR(log_root)) - return PTR_ERR(log_root); - - log_root->last_trans = trans->transid; - log_root->root_key.offset = root->root_key.objectid; - - inode_item = &log_root->root_item.inode; - inode_item->generation = cpu_to_le64(1); - inode_item->size = cpu_to_le64(3); - inode_item->nlink = cpu_to_le32(1); - inode_item->nbytes = cpu_to_le64(root->leafsize); - inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - - btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start); - btrfs_set_root_generation(&log_root->root_item, trans->transid); - - WARN_ON(root->log_root); - root->log_root = log_root; - root->log_transid = 0; + fs_info->log_root_tree = root; return 0; } @@ -1200,6 +1136,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) { struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; int ret = 0; + struct list_head *cur; struct btrfs_device *device; struct backing_dev_info *bdi; #if 0 @@ -1207,7 +1144,8 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) btrfs_congested_async(info, 0)) return 1; #endif - list_for_each_entry(device, &info->fs_devices->devices, dev_list) { + list_for_each(cur, &info->fs_devices->devices) { + device = list_entry(cur, struct btrfs_device, dev_list); if (!device->bdev) continue; bdi = blk_get_backing_dev_info(device->bdev); @@ -1225,11 +1163,13 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) */ static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { + struct list_head *cur; struct btrfs_device *device; struct btrfs_fs_info *info; info = (struct btrfs_fs_info *)bdi->unplug_io_data; - list_for_each_entry(device, &info->fs_devices->devices, dev_list) { + list_for_each(cur, &info->fs_devices->devices) { + device = list_entry(cur, struct btrfs_device, dev_list); if (!device->bdev) continue; @@ -1507,6 +1447,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); INIT_LIST_HEAD(&fs_info->delalloc_inodes); + spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); spin_lock_init(&fs_info->ref_cache_lock); @@ -1594,6 +1535,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->async_submit_wait); + init_waitqueue_head(&fs_info->tree_log_wait); + atomic_set(&fs_info->tree_log_commit, 0); + atomic_set(&fs_info->tree_log_writers, 0); + fs_info->tree_log_transid = 0; __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); @@ -1682,8 +1627,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, * low idle thresh */ fs_info->endio_workers.idle_thresh = 4; - fs_info->endio_meta_workers.idle_thresh = 4; - fs_info->endio_write_workers.idle_thresh = 64; fs_info->endio_meta_write_workers.idle_thresh = 64; @@ -1797,13 +1740,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); - if (IS_ERR(fs_info->cleaner_kthread)) + if (!fs_info->cleaner_kthread) goto fail_csum_root; fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, "btrfs-transaction"); - if (IS_ERR(fs_info->transaction_kthread)) + if (!fs_info->transaction_kthread) goto fail_cleaner; if (btrfs_super_log_root(disk_super) != 0) { @@ -1885,14 +1828,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, fail_iput: invalidate_inode_pages2(fs_info->btree_inode->i_mapping); iput(fs_info->btree_inode); - +fail: btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); - bdi_destroy(&fs_info->bdi); -fail: kfree(extent_root); kfree(tree_root); + bdi_destroy(&fs_info->bdi); kfree(fs_info); kfree(chunk_root); kfree(dev_root); @@ -2053,6 +1995,7 @@ static int write_dev_supers(struct btrfs_device *device, int write_all_supers(struct btrfs_root *root, int max_mirrors) { + struct list_head *cur; struct list_head *head = &root->fs_info->fs_devices->devices; struct btrfs_device *dev; struct btrfs_super_block *sb; @@ -2068,7 +2011,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) sb = &root->fs_info->super_for_commit; dev_item = &sb->dev_item; - list_for_each_entry(dev, head, dev_list) { + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); if (!dev->bdev) { total_errors++; continue; @@ -2101,7 +2045,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) } total_errors = 0; - list_for_each_entry(dev, head, dev_list) { + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); if (!dev->bdev) continue; if (!dev->in_fs_metadata || !dev->writeable) @@ -2315,8 +2260,6 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; - btrfs_set_lock_blocking(buf); - WARN_ON(!btrfs_tree_locked(buf)); if (transid != root->fs_info->generation) { printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " @@ -2359,13 +2302,14 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) int ret; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) - set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); + buf->flags |= EXTENT_UPTODATE; return ret; } int btree_lock_page_hook(struct page *page) { struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_buffer *eb; unsigned long len; @@ -2380,7 +2324,9 @@ int btree_lock_page_hook(struct page *page) goto out; btrfs_tree_lock(eb); + spin_lock(&root->fs_info->hash_lock); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + spin_unlock(&root->fs_info->hash_lock); btrfs_tree_unlock(eb); free_extent_buffer(eb); out: diff --git a/trunk/fs/btrfs/disk-io.h b/trunk/fs/btrfs/disk-io.h index 494a56eb2986..c0ff404c31b7 100644 --- a/trunk/fs/btrfs/disk-io.h +++ b/trunk/fs/btrfs/disk-io.h @@ -98,7 +98,5 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); -int btrfs_add_log_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root); int btree_lock_page_hook(struct page *page); #endif diff --git a/trunk/fs/btrfs/extent-tree.c b/trunk/fs/btrfs/extent-tree.c index 7527523c2d2d..293da650873f 100644 --- a/trunk/fs/btrfs/extent-tree.c +++ b/trunk/fs/btrfs/extent-tree.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "compat.h" #include "hash.h" #include "crc32c.h" @@ -30,6 +30,7 @@ #include "volumes.h" #include "locking.h" #include "ref-cache.h" +#include "compat.h" #define PENDING_EXTENT_INSERT 0 #define PENDING_EXTENT_DELETE 1 @@ -325,8 +326,10 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, u64 flags) { struct list_head *head = &info->space_info; + struct list_head *cur; struct btrfs_space_info *found; - list_for_each_entry(found, head, list) { + list_for_each(cur, head) { + found = list_entry(cur, struct btrfs_space_info, list); if (found->flags == flags) return found; } @@ -1522,55 +1525,15 @@ int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, return ret; } -/* when a block goes through cow, we update the reference counts of - * everything that block points to. The internal pointers of the block - * can be in just about any order, and it is likely to have clusters of - * things that are close together and clusters of things that are not. - * - * To help reduce the seeks that come with updating all of these reference - * counts, sort them by byte number before actual updates are done. - * - * struct refsort is used to match byte number to slot in the btree block. - * we sort based on the byte number and then use the slot to actually - * find the item. - * - * struct refsort is smaller than strcut btrfs_item and smaller than - * struct btrfs_key_ptr. Since we're currently limited to the page size - * for a btree block, there's no way for a kmalloc of refsorts for a - * single node to be bigger than a page. - */ -struct refsort { - u64 bytenr; - u32 slot; -}; - -/* - * for passing into sort() - */ -static int refsort_cmp(const void *a_void, const void *b_void) -{ - const struct refsort *a = a_void; - const struct refsort *b = b_void; - - if (a->bytenr < b->bytenr) - return -1; - if (a->bytenr > b->bytenr) - return 1; - return 0; -} - - -noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *orig_buf, - struct extent_buffer *buf, u32 *nr_extents) +int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *orig_buf, struct extent_buffer *buf, + u32 *nr_extents) { u64 bytenr; u64 ref_root; u64 orig_root; u64 ref_generation; u64 orig_generation; - struct refsort *sorted; u32 nritems; u32 nr_file_extents = 0; struct btrfs_key key; @@ -1579,8 +1542,6 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, int level; int ret = 0; int faili = 0; - int refi = 0; - int slot; int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, u64, u64, u64, u64, u64, u64, u64, u64); @@ -1592,9 +1553,6 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, nritems = btrfs_header_nritems(buf); level = btrfs_header_level(buf); - sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS); - BUG_ON(!sorted); - if (root->ref_cows) { process_func = __btrfs_inc_extent_ref; } else { @@ -1607,11 +1565,6 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, process_func = __btrfs_update_extent_ref; } - /* - * we make two passes through the items. In the first pass we - * only record the byte number and slot. Then we sort based on - * byte number and do the actual work based on the sorted results - */ for (i = 0; i < nritems; i++) { cond_resched(); if (level == 0) { @@ -1628,32 +1581,6 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, continue; nr_file_extents++; - sorted[refi].bytenr = bytenr; - sorted[refi].slot = i; - refi++; - } else { - bytenr = btrfs_node_blockptr(buf, i); - sorted[refi].bytenr = bytenr; - sorted[refi].slot = i; - refi++; - } - } - /* - * if refi == 0, we didn't actually put anything into the sorted - * array and we're done - */ - if (refi == 0) - goto out; - - sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); - - for (i = 0; i < refi; i++) { - cond_resched(); - slot = sorted[i].slot; - bytenr = sorted[i].bytenr; - - if (level == 0) { - btrfs_item_key_to_cpu(buf, &key, slot); ret = process_func(trans, root, bytenr, orig_buf->start, buf->start, @@ -1662,25 +1589,25 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, key.objectid); if (ret) { - faili = slot; + faili = i; WARN_ON(1); goto fail; } } else { + bytenr = btrfs_node_blockptr(buf, i); ret = process_func(trans, root, bytenr, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, level - 1); if (ret) { - faili = slot; + faili = i; WARN_ON(1); goto fail; } } } out: - kfree(sorted); if (nr_extents) { if (level == 0) *nr_extents = nr_file_extents; @@ -1689,7 +1616,6 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, } return 0; fail: - kfree(sorted); WARN_ON(1); return ret; } @@ -2233,8 +2159,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, ret = find_first_extent_bit(&info->extent_ins, search, &start, &end, EXTENT_WRITEBACK); if (ret) { - if (skipped && all && !num_inserts && - list_empty(&update_list)) { + if (skipped && all && !num_inserts) { skipped = 0; search = 0; continue; @@ -2622,7 +2547,6 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, if (ret) { if (all && skipped && !nr) { search = 0; - skipped = 0; continue; } mutex_unlock(&info->extent_ins_mutex); @@ -2776,9 +2700,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, /* if metadata always pin */ if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { - mutex_lock(&root->fs_info->pinned_mutex); - btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); - mutex_unlock(&root->fs_info->pinned_mutex); + struct btrfs_block_group_cache *cache; + + /* btrfs_free_reserved_extent */ + cache = btrfs_lookup_block_group(root->fs_info, bytenr); + BUG_ON(!cache); + btrfs_add_free_space(cache, bytenr, num_bytes); + put_block_group(cache); update_reserved_extents(root, bytenr, num_bytes, 0); return 0; } @@ -3086,6 +3014,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, static void dump_space_info(struct btrfs_space_info *info, u64 bytes) { struct btrfs_block_group_cache *cache; + struct list_head *l; printk(KERN_INFO "space_info has %llu free, is %sfull\n", (unsigned long long)(info->total_bytes - info->bytes_used - @@ -3093,7 +3022,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) (info->full) ? "" : "not "); down_read(&info->groups_sem); - list_for_each_entry(cache, &info->block_groups, list) { + list_for_each(l, &info->block_groups) { + cache = list_entry(l, struct btrfs_block_group_cache, list); spin_lock(&cache->lock); printk(KERN_INFO "block group %llu has %llu bytes, %llu used " "%llu pinned %llu reserved\n", @@ -3412,10 +3342,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, btrfs_set_header_generation(buf, trans->transid); btrfs_tree_lock(buf); clean_tree_block(trans, root, buf); - - btrfs_set_lock_blocking(buf); btrfs_set_buffer_uptodate(buf); - if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { set_extent_dirty(&root->dirty_log_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); @@ -3424,7 +3351,6 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, buf->start + buf->len - 1, GFP_NOFS); } trans->blocks_used++; - /* this returns a buffer locked for blocking */ return buf; } @@ -3462,73 +3388,36 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, { u64 leaf_owner; u64 leaf_generation; - struct refsort *sorted; struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int nritems; int ret; - int refi = 0; - int slot; BUG_ON(!btrfs_is_leaf(leaf)); nritems = btrfs_header_nritems(leaf); leaf_owner = btrfs_header_owner(leaf); leaf_generation = btrfs_header_generation(leaf); - sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); - /* we do this loop twice. The first time we build a list - * of the extents we have a reference on, then we sort the list - * by bytenr. The second time around we actually do the - * extent freeing. - */ for (i = 0; i < nritems; i++) { u64 disk_bytenr; cond_resched(); btrfs_item_key_to_cpu(leaf, &key, i); - - /* only extents have references, skip everything else */ if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; - fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - - /* inline extents live in the btree, they don't have refs */ if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - + /* + * FIXME make sure to insert a trans record that + * repeats the snapshot del on crash + */ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); - - /* holes don't have refs */ if (disk_bytenr == 0) continue; - sorted[refi].bytenr = disk_bytenr; - sorted[refi].slot = i; - refi++; - } - - if (refi == 0) - goto out; - - sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); - - for (i = 0; i < refi; i++) { - u64 disk_bytenr; - - disk_bytenr = sorted[i].bytenr; - slot = sorted[i].slot; - - cond_resched(); - - btrfs_item_key_to_cpu(leaf, &key, slot); - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) - continue; - - fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - ret = __btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), leaf->start, leaf_owner, leaf_generation, @@ -3539,8 +3428,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, wake_up(&root->fs_info->transaction_throttle); cond_resched(); } -out: - kfree(sorted); return 0; } @@ -3550,25 +3437,9 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, { int i; int ret; - struct btrfs_extent_info *info; - struct refsort *sorted; - - if (ref->nritems == 0) - return 0; + struct btrfs_extent_info *info = ref->extents; - sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); for (i = 0; i < ref->nritems; i++) { - sorted[i].bytenr = ref->extents[i].bytenr; - sorted[i].slot = i; - } - sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); - - /* - * the items in the ref were sorted when the ref was inserted - * into the ref cache, so this is already in order - */ - for (i = 0; i < ref->nritems; i++) { - info = ref->extents + sorted[i].slot; ret = __btrfs_free_extent(trans, root, info->bytenr, info->num_bytes, ref->bytenr, ref->owner, ref->generation, @@ -3582,7 +3453,6 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, info++; } - kfree(sorted); return 0; } @@ -3626,152 +3496,6 @@ static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, return ret; } -/* - * this is used while deleting old snapshots, and it drops the refs - * on a whole subtree starting from a level 1 node. - * - * The idea is to sort all the leaf pointers, and then drop the - * ref on all the leaves in order. Most of the time the leaves - * will have ref cache entries, so no leaf IOs will be required to - * find the extents they have references on. - * - * For each leaf, any references it has are also dropped in order - * - * This ends up dropping the references in something close to optimal - * order for reading and modifying the extent allocation tree. - */ -static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path) -{ - u64 bytenr; - u64 root_owner; - u64 root_gen; - struct extent_buffer *eb = path->nodes[1]; - struct extent_buffer *leaf; - struct btrfs_leaf_ref *ref; - struct refsort *sorted = NULL; - int nritems = btrfs_header_nritems(eb); - int ret; - int i; - int refi = 0; - int slot = path->slots[1]; - u32 blocksize = btrfs_level_size(root, 0); - u32 refs; - - if (nritems == 0) - goto out; - - root_owner = btrfs_header_owner(eb); - root_gen = btrfs_header_generation(eb); - sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); - - /* - * step one, sort all the leaf pointers so we don't scribble - * randomly into the extent allocation tree - */ - for (i = slot; i < nritems; i++) { - sorted[refi].bytenr = btrfs_node_blockptr(eb, i); - sorted[refi].slot = i; - refi++; - } - - /* - * nritems won't be zero, but if we're picking up drop_snapshot - * after a crash, slot might be > 0, so double check things - * just in case. - */ - if (refi == 0) - goto out; - - sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); - - /* - * the first loop frees everything the leaves point to - */ - for (i = 0; i < refi; i++) { - u64 ptr_gen; - - bytenr = sorted[i].bytenr; - - /* - * check the reference count on this leaf. If it is > 1 - * we just decrement it below and don't update any - * of the refs the leaf points to. - */ - ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); - BUG_ON(ret); - if (refs != 1) - continue; - - ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); - - /* - * the leaf only had one reference, which means the - * only thing pointing to this leaf is the snapshot - * we're deleting. It isn't possible for the reference - * count to increase again later - * - * The reference cache is checked for the leaf, - * and if found we'll be able to drop any refs held by - * the leaf without needing to read it in. - */ - ref = btrfs_lookup_leaf_ref(root, bytenr); - if (ref && ref->generation != ptr_gen) { - btrfs_free_leaf_ref(root, ref); - ref = NULL; - } - if (ref) { - ret = cache_drop_leaf_ref(trans, root, ref); - BUG_ON(ret); - btrfs_remove_leaf_ref(root, ref); - btrfs_free_leaf_ref(root, ref); - } else { - /* - * the leaf wasn't in the reference cache, so - * we have to read it. - */ - leaf = read_tree_block(root, bytenr, blocksize, - ptr_gen); - ret = btrfs_drop_leaf_ref(trans, root, leaf); - BUG_ON(ret); - free_extent_buffer(leaf); - } - atomic_inc(&root->fs_info->throttle_gen); - wake_up(&root->fs_info->transaction_throttle); - cond_resched(); - } - - /* - * run through the loop again to free the refs on the leaves. - * This is faster than doing it in the loop above because - * the leaves are likely to be clustered together. We end up - * working in nice chunks on the extent allocation tree. - */ - for (i = 0; i < refi; i++) { - bytenr = sorted[i].bytenr; - ret = __btrfs_free_extent(trans, root, bytenr, - blocksize, eb->start, - root_owner, root_gen, 0, 1); - BUG_ON(ret); - - atomic_inc(&root->fs_info->throttle_gen); - wake_up(&root->fs_info->transaction_throttle); - cond_resched(); - } -out: - kfree(sorted); - - /* - * update the path to show we've processed the entire level 1 - * node. This will get saved into the root's drop_snapshot_progress - * field so these drops are not repeated again if this transaction - * commits. - */ - path->slots[1] = nritems; - return 0; -} - /* * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. @@ -3787,6 +3511,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, struct extent_buffer *next; struct extent_buffer *cur; struct extent_buffer *parent; + struct btrfs_leaf_ref *ref; u32 blocksize; int ret; u32 refs; @@ -3813,46 +3538,17 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, if (path->slots[*level] >= btrfs_header_nritems(cur)) break; - - /* the new code goes down to level 1 and does all the - * leaves pointed to that node in bulk. So, this check - * for level 0 will always be false. - * - * But, the disk format allows the drop_snapshot_progress - * field in the root to leave things in a state where - * a leaf will need cleaning up here. If someone crashes - * with the old code and then boots with the new code, - * we might find a leaf here. - */ if (*level == 0) { ret = btrfs_drop_leaf_ref(trans, root, cur); BUG_ON(ret); break; } - - /* - * once we get to level one, process the whole node - * at once, including everything below it. - */ - if (*level == 1) { - ret = drop_level_one_refs(trans, root, path); - BUG_ON(ret); - break; - } - bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = btrfs_level_size(root, *level - 1); ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); BUG_ON(ret); - - /* - * if there is more than one reference, we don't need - * to read that node to drop any references it has. We - * just drop the ref we hold on that node and move on to the - * next slot in this level. - */ if (refs != 1) { parent = path->nodes[*level]; root_owner = btrfs_header_owner(parent); @@ -3871,12 +3567,46 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, continue; } - /* - * we need to keep freeing things in the next level down. - * read the block and loop around to process it + * at this point, we have a single ref, and since the + * only place referencing this extent is a dead root + * the reference count should never go higher. + * So, we don't need to check it again */ - next = read_tree_block(root, bytenr, blocksize, ptr_gen); + if (*level == 1) { + ref = btrfs_lookup_leaf_ref(root, bytenr); + if (ref && ref->generation != ptr_gen) { + btrfs_free_leaf_ref(root, ref); + ref = NULL; + } + if (ref) { + ret = cache_drop_leaf_ref(trans, root, ref); + BUG_ON(ret); + btrfs_remove_leaf_ref(root, ref); + btrfs_free_leaf_ref(root, ref); + *level = 0; + break; + } + } + next = btrfs_find_tree_block(root, bytenr, blocksize); + if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { + free_extent_buffer(next); + + next = read_tree_block(root, bytenr, blocksize, + ptr_gen); + cond_resched(); +#if 0 + /* + * this is a debugging check and can go away + * the ref should never go all the way down to 1 + * at this point + */ + ret = lookup_extent_ref(NULL, root, bytenr, blocksize, + &refs); + BUG_ON(ret); + WARN_ON(refs != 1); +#endif + } WARN_ON(*level <= 0); if (path->nodes[*level-1]) free_extent_buffer(path->nodes[*level-1]); @@ -3901,16 +3631,11 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); - /* - * cleanup and free the reference on the last node - * we processed - */ ret = __btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, root_owner, root_gen, *level, 1); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; - *level += 1; BUG_ON(ret); @@ -3962,7 +3687,6 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, next = read_tree_block(root, bytenr, blocksize, ptr_gen); btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, &refs); @@ -4030,13 +3754,6 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { struct extent_buffer *node; struct btrfs_disk_key disk_key; - - /* - * there is more work to do in this level. - * Update the drop_progress marker to reflect - * the work we've done so far, and then bump - * the slot number - */ node = path->nodes[i]; path->slots[i]++; *level = i; @@ -4048,11 +3765,6 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, return 0; } else { struct extent_buffer *parent; - - /* - * this whole node is done, free our reference - * on it and go up one level - */ if (path->nodes[*level] == root->node) parent = path->nodes[*level]; else @@ -4732,7 +4444,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, u64 lock_end = 0; u64 num_bytes; u64 ext_offset; - u64 search_end = (u64)-1; + u64 first_pos; u32 nritems; int nr_scaned = 0; int extent_locked = 0; @@ -4740,6 +4452,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, int ret; memcpy(&key, leaf_key, sizeof(key)); + first_pos = INT_LIMIT(loff_t) - extent_key->offset; if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { if (key.objectid < ref_path->owner_objectid || (key.objectid == ref_path->owner_objectid && @@ -4788,7 +4501,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, if ((key.objectid > ref_path->owner_objectid) || (key.objectid == ref_path->owner_objectid && key.type > BTRFS_EXTENT_DATA_KEY) || - key.offset >= search_end) + (key.offset >= first_pos + extent_key->offset)) break; } @@ -4821,10 +4534,8 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, num_bytes = btrfs_file_extent_num_bytes(leaf, fi); ext_offset = btrfs_file_extent_offset(leaf, fi); - if (search_end == (u64)-1) { - search_end = key.offset - ext_offset + - btrfs_file_extent_ram_bytes(leaf, fi); - } + if (first_pos > key.offset - ext_offset) + first_pos = key.offset - ext_offset; if (!extent_locked) { lock_start = key.offset; @@ -5013,7 +4724,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, } skip: if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && - key.offset >= search_end) + key.offset >= first_pos + extent_key->offset) break; cond_resched(); @@ -5067,7 +4778,6 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, ref->bytenr = buf->start; ref->owner = btrfs_header_owner(buf); ref->generation = btrfs_header_generation(buf); - ret = btrfs_add_leaf_ref(root, ref, 0); WARN_ON(ret); btrfs_free_leaf_ref(root, ref); @@ -6247,11 +5957,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - spin_lock(&root->fs_info->block_group_cache_lock); + btrfs_remove_free_space_cache(block_group); rb_erase(&block_group->cache_node, &root->fs_info->block_group_cache_tree); - spin_unlock(&root->fs_info->block_group_cache_lock); - btrfs_remove_free_space_cache(block_group); down_write(&block_group->space_info->groups_sem); list_del(&block_group->list); up_write(&block_group->space_info->groups_sem); diff --git a/trunk/fs/btrfs/extent_io.c b/trunk/fs/btrfs/extent_io.c index 37d43b516b79..e086d407f1fa 100644 --- a/trunk/fs/btrfs/extent_io.c +++ b/trunk/fs/btrfs/extent_io.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include "extent_io.h" @@ -30,7 +31,7 @@ static LIST_HEAD(buffers); static LIST_HEAD(states); #define LEAK_DEBUG 0 -#if LEAK_DEBUG +#ifdef LEAK_DEBUG static DEFINE_SPINLOCK(leak_lock); #endif @@ -119,7 +120,7 @@ void extent_io_tree_init(struct extent_io_tree *tree, static struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; -#if LEAK_DEBUG +#ifdef LEAK_DEBUG unsigned long flags; #endif @@ -129,7 +130,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) state->state = 0; state->private = 0; state->tree = NULL; -#if LEAK_DEBUG +#ifdef LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_add(&state->leak_list, &states); spin_unlock_irqrestore(&leak_lock, flags); @@ -144,11 +145,11 @@ static void free_extent_state(struct extent_state *state) if (!state) return; if (atomic_dec_and_test(&state->refs)) { -#if LEAK_DEBUG +#ifdef LEAK_DEBUG unsigned long flags; #endif WARN_ON(state->tree); -#if LEAK_DEBUG +#ifdef LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_del(&state->leak_list); spin_unlock_irqrestore(&leak_lock, flags); @@ -2377,6 +2378,11 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, int scanned = 0; int range_whole = 0; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + pagevec_init(&pvec, 0); if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ @@ -2849,98 +2855,6 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, return sector; } -int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len, get_extent_t *get_extent) -{ - int ret; - u64 off = start; - u64 max = start + len; - u32 flags = 0; - u64 disko = 0; - struct extent_map *em = NULL; - int end = 0; - u64 em_start = 0, em_len = 0; - unsigned long emflags; - ret = 0; - - if (len == 0) - return -EINVAL; - - lock_extent(&BTRFS_I(inode)->io_tree, start, start + len, - GFP_NOFS); - em = get_extent(inode, NULL, 0, off, max - off, 0); - if (!em) - goto out; - if (IS_ERR(em)) { - ret = PTR_ERR(em); - goto out; - } - while (!end) { - off = em->start + em->len; - if (off >= max) - end = 1; - - em_start = em->start; - em_len = em->len; - - disko = 0; - flags = 0; - - switch (em->block_start) { - case EXTENT_MAP_LAST_BYTE: - end = 1; - flags |= FIEMAP_EXTENT_LAST; - break; - case EXTENT_MAP_HOLE: - flags |= FIEMAP_EXTENT_UNWRITTEN; - break; - case EXTENT_MAP_INLINE: - flags |= (FIEMAP_EXTENT_DATA_INLINE | - FIEMAP_EXTENT_NOT_ALIGNED); - break; - case EXTENT_MAP_DELALLOC: - flags |= (FIEMAP_EXTENT_DELALLOC | - FIEMAP_EXTENT_UNKNOWN); - break; - default: - disko = em->block_start; - break; - } - if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) - flags |= FIEMAP_EXTENT_ENCODED; - - emflags = em->flags; - free_extent_map(em); - em = NULL; - - if (!end) { - em = get_extent(inode, NULL, 0, off, max - off, 0); - if (!em) - goto out; - if (IS_ERR(em)) { - ret = PTR_ERR(em); - goto out; - } - emflags = em->flags; - } - if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { - flags |= FIEMAP_EXTENT_LAST; - end = 1; - } - - ret = fiemap_fill_next_extent(fieinfo, em_start, disko, - em_len, flags); - if (ret) - goto out_free; - } -out_free: - free_extent_map(em); -out: - unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len, - GFP_NOFS); - return ret; -} - static inline struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i) { @@ -2978,17 +2892,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, gfp_t mask) { struct extent_buffer *eb = NULL; -#if LEAK_DEBUG +#ifdef LEAK_DEBUG unsigned long flags; #endif eb = kmem_cache_zalloc(extent_buffer_cache, mask); eb->start = start; eb->len = len; - spin_lock_init(&eb->lock); - init_waitqueue_head(&eb->lock_wq); - -#if LEAK_DEBUG + mutex_init(&eb->mutex); +#ifdef LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_add(&eb->leak_list, &buffers); spin_unlock_irqrestore(&leak_lock, flags); @@ -3000,7 +2912,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, static void __free_extent_buffer(struct extent_buffer *eb) { -#if LEAK_DEBUG +#ifdef LEAK_DEBUG unsigned long flags; spin_lock_irqsave(&leak_lock, flags); list_del(&eb->leak_list); @@ -3068,7 +2980,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, unlock_page(p); } if (uptodate) - set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; spin_lock(&tree->buffer_lock); exists = buffer_tree_insert(tree, start, &eb->rb_node); @@ -3222,7 +3135,7 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, unsigned long num_pages; num_pages = num_extent_pages(eb->start, eb->len); - clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + eb->flags &= ~EXTENT_UPTODATE; clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); @@ -3293,7 +3206,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, struct page *page; int pg_uptodate = 1; - if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) + if (eb->flags & EXTENT_UPTODATE) return 1; ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, @@ -3329,7 +3242,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, struct bio *bio = NULL; unsigned long bio_flags = 0; - if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) + if (eb->flags & EXTENT_UPTODATE) return 0; if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, @@ -3360,7 +3273,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, } if (all_uptodate) { if (start_i == 0) - set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + eb->flags |= EXTENT_UPTODATE; goto unlock_exit; } @@ -3396,7 +3309,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, } if (!ret) - set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + eb->flags |= EXTENT_UPTODATE; return ret; unlock_exit: @@ -3493,6 +3406,7 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, unmap_extent_buffer(eb, eb->map_token, km); eb->map_token = NULL; save = 1; + WARN_ON(!mutex_is_locked(&eb->mutex)); } err = map_private_extent_buffer(eb, start, min_len, token, map, map_start, map_len, km); diff --git a/trunk/fs/btrfs/extent_io.h b/trunk/fs/btrfs/extent_io.h index 1f9df88afbf6..c5b483a79137 100644 --- a/trunk/fs/btrfs/extent_io.h +++ b/trunk/fs/btrfs/extent_io.h @@ -22,10 +22,6 @@ /* flags for bio submission */ #define EXTENT_BIO_COMPRESSED 1 -/* these are bit numbers for test/set bit */ -#define EXTENT_BUFFER_UPTODATE 0 -#define EXTENT_BUFFER_BLOCKING 1 - /* * page->private values. Every page that is controlled by the extent * map has page->private set to one. @@ -99,19 +95,11 @@ struct extent_buffer { unsigned long map_start; unsigned long map_len; struct page *first_page; - unsigned long bflags; atomic_t refs; + int flags; struct list_head leak_list; struct rb_node rb_node; - - /* the spinlock is used to protect most operations */ - spinlock_t lock; - - /* - * when we keep the lock held while blocking, waiters go onto - * the wq - */ - wait_queue_head_t lock_wq; + struct mutex mutex; }; struct extent_map_tree; @@ -205,8 +193,6 @@ int extent_commit_write(struct extent_io_tree *tree, unsigned from, unsigned to); sector_t extent_bmap(struct address_space *mapping, sector_t iblock, get_extent_t *get_extent); -int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len, get_extent_t *get_extent); int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end); int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); diff --git a/trunk/fs/btrfs/extent_map.c b/trunk/fs/btrfs/extent_map.c index 50da69da20ce..4a83e33ada32 100644 --- a/trunk/fs/btrfs/extent_map.c +++ b/trunk/fs/btrfs/extent_map.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "extent_map.h" diff --git a/trunk/fs/btrfs/file.c b/trunk/fs/btrfs/file.c index 3e8023efaff7..90268334145e 100644 --- a/trunk/fs/btrfs/file.c +++ b/trunk/fs/btrfs/file.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -1214,10 +1215,10 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) } mutex_unlock(&root->fs_info->trans_mutex); - root->log_batch++; + root->fs_info->tree_log_batch++; filemap_fdatawrite(inode->i_mapping); btrfs_wait_ordered_range(inode, 0, (u64)-1); - root->log_batch++; + root->fs_info->tree_log_batch++; /* * ok we haven't committed the transaction yet, lets do a commit diff --git a/trunk/fs/btrfs/inode.c b/trunk/fs/btrfs/inode.c index 8f0706210a47..8adfe059ab41 100644 --- a/trunk/fs/btrfs/inode.c +++ b/trunk/fs/btrfs/inode.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,6 @@ #include "tree-log.h" #include "ref-cache.h" #include "compression.h" -#include "locking.h" struct btrfs_iget_args { u64 ino; @@ -91,16 +91,6 @@ static noinline int cow_file_range(struct inode *inode, u64 start, u64 end, int *page_started, unsigned long *nr_written, int unlock); -static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) -{ - int err; - - err = btrfs_init_acl(inode, dir); - if (!err) - err = btrfs_xattr_security_init(inode, dir); - return err; -} - /* * a very lame attempt at stopping writes when the FS is 85% full. There * are countless ways this is incorrect, but it is better than nothing. @@ -360,19 +350,6 @@ static noinline int compress_file_range(struct inode *inode, nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE); - /* - * we don't want to send crud past the end of i_size through - * compression, that's just a waste of CPU time. So, if the - * end of the file is before the start of our current - * requested range of bytes, we bail out to the uncompressed - * cleanup code that can deal with all of this. - * - * It isn't really the fastest way to fix things, but this is a - * very uncommon corner. - */ - if (actual_end <= start) - goto cleanup_and_bail_uncompressed; - total_compressed = actual_end - start; /* we want to make sure that amount of ram required to uncompress @@ -517,7 +494,6 @@ static noinline int compress_file_range(struct inode *inode, goto again; } } else { -cleanup_and_bail_uncompressed: /* * No compression, but we still need to write the pages in * the file we've been given so far. redirty the locked @@ -1348,11 +1324,12 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, struct inode *inode, u64 file_offset, struct list_head *list) { + struct list_head *cur; struct btrfs_ordered_sum *sum; btrfs_set_trans_block_group(trans, inode); - - list_for_each_entry(sum, list, list) { + list_for_each(cur, list) { + sum = list_entry(cur, struct btrfs_ordered_sum, list); btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root->fs_info->csum_root, sum); } @@ -2036,7 +2013,6 @@ void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); alloc_group_block = btrfs_inode_block_group(leaf, inode_item); - BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, alloc_group_block, 0); btrfs_free_path(path); @@ -2063,7 +2039,6 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_mapping->backing_dev_info = &root->fs_info->bdi; break; default: - inode->i_op = &btrfs_special_inode_operations; init_special_inode(inode, inode->i_mode, rdev); break; } @@ -2133,7 +2108,6 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, goto failed; } - btrfs_unlock_up_safe(path, 1); leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); @@ -2455,8 +2429,6 @@ static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans, ref->generation = leaf_gen; ref->nritems = 0; - btrfs_sort_leaf_ref(ref); - ret = btrfs_add_leaf_ref(root, ref, 0); WARN_ON(ret); btrfs_free_leaf_ref(root, ref); @@ -2504,7 +2476,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key key; struct btrfs_key found_key; - u32 found_type = (u8)-1; + u32 found_type; struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; u64 extent_start = 0; @@ -2691,8 +2663,6 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, if (pending_del_nr) goto del_pending; btrfs_release_path(root, path); - if (found_type == BTRFS_INODE_ITEM_KEY) - break; goto search_again; } @@ -2709,8 +2679,6 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, BUG_ON(ret); pending_del_nr = 0; btrfs_release_path(root, path); - if (found_type == BTRFS_INODE_ITEM_KEY) - break; goto search_again; } } @@ -3297,7 +3265,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, /* Reached end of directory/root. Bump pos past the last item. */ if (key_type == BTRFS_DIR_INDEX_KEY) - filp->f_pos = INT_LIMIT(off_t); + filp->f_pos = INT_LIMIT(typeof(filp->f_pos)); else filp->f_pos++; nopos: @@ -3490,14 +3458,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, root->highest_inode = objectid; inode->i_uid = current_fsuid(); - - if (dir && (dir->i_mode & S_ISGID)) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else - inode->i_gid = current_fsgid(); - + inode->i_gid = current_fsgid(); inode->i_mode = mode; inode->i_ino = objectid; inode_set_bytes(inode, 0); @@ -3625,7 +3586,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_acl(inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -3688,7 +3649,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_acl(inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -3811,7 +3772,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) drop_on_err = 1; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_acl(inode, dir); if (err) goto out_fail; @@ -4197,10 +4158,9 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, return -EINVAL; } -static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len) +static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) { - return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); + return extent_bmap(mapping, iblock, btrfs_get_extent); } int btrfs_readpage(struct file *file, struct page *page) @@ -4773,7 +4733,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_acl(inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -5027,24 +4987,13 @@ static struct extent_io_ops btrfs_extent_io_ops = { .clear_bit_hook = btrfs_clear_bit_hook, }; -/* - * btrfs doesn't support the bmap operation because swapfiles - * use bmap to make a mapping of extents in the file. They assume - * these extents won't change over the life of the file and they - * use the bmap result to do IO directly to the drive. - * - * the btrfs bmap call would return logical addresses that aren't - * suitable for IO and they also will change frequently as COW - * operations happen. So, swapfile + btrfs == corruption. - * - * For now we're avoiding this by dropping bmap. - */ static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, .writepages = btrfs_writepages, .readpages = btrfs_readpages, .sync_page = block_sync_page, + .bmap = btrfs_bmap, .direct_IO = btrfs_direct_IO, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, @@ -5068,7 +5017,6 @@ static struct inode_operations btrfs_file_inode_operations = { .removexattr = btrfs_removexattr, .permission = btrfs_permission, .fallocate = btrfs_fallocate, - .fiemap = btrfs_fiemap, }; static struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, @@ -5084,8 +5032,4 @@ static struct inode_operations btrfs_symlink_inode_operations = { .follow_link = page_follow_link_light, .put_link = page_put_link, .permission = btrfs_permission, - .setxattr = btrfs_setxattr, - .getxattr = btrfs_getxattr, - .listxattr = btrfs_listxattr, - .removexattr = btrfs_removexattr, }; diff --git a/trunk/fs/btrfs/ioctl.c b/trunk/fs/btrfs/ioctl.c index 988fdc8b49eb..c2aa33e3feb5 100644 --- a/trunk/fs/btrfs/ioctl.c +++ b/trunk/fs/btrfs/ioctl.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include "compat.h" diff --git a/trunk/fs/btrfs/locking.c b/trunk/fs/btrfs/locking.c index 68fd9ccf1805..39bae7761db6 100644 --- a/trunk/fs/btrfs/locking.c +++ b/trunk/fs/btrfs/locking.c @@ -26,215 +26,45 @@ #include "locking.h" /* - * btrfs_header_level() isn't free, so don't call it when lockdep isn't - * on - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC -static inline void spin_nested(struct extent_buffer *eb) -{ - spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); -} -#else -static inline void spin_nested(struct extent_buffer *eb) -{ - spin_lock(&eb->lock); -} -#endif - -/* - * Setting a lock to blocking will drop the spinlock and set the - * flag that forces other procs who want the lock to wait. After - * this you can safely schedule with the lock held. - */ -void btrfs_set_lock_blocking(struct extent_buffer *eb) -{ - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { - set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); - spin_unlock(&eb->lock); - } - /* exit with the spin lock released and the bit set */ -} - -/* - * clearing the blocking flag will take the spinlock again. - * After this you can't safely schedule - */ -void btrfs_clear_lock_blocking(struct extent_buffer *eb) -{ - if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { - spin_nested(eb); - clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); - smp_mb__after_clear_bit(); - } - /* exit with the spin lock held */ -} - -/* - * unfortunately, many of the places that currently set a lock to blocking - * don't end up blocking for every long, and often they don't block - * at all. For a dbench 50 run, if we don't spin one the blocking bit - * at all, the context switch rate can jump up to 400,000/sec or more. - * - * So, we're still stuck with this crummy spin on the blocking bit, - * at least until the most common causes of the short blocks - * can be dealt with. - */ -static int btrfs_spin_on_block(struct extent_buffer *eb) -{ - int i; - for (i = 0; i < 512; i++) { - cpu_relax(); - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - return 1; - if (need_resched()) - break; - } - return 0; -} - -/* - * This is somewhat different from trylock. It will take the - * spinlock but if it finds the lock is set to blocking, it will - * return without the lock held. + * locks the per buffer mutex in an extent buffer. This uses adaptive locks + * and the spin is not tuned very extensively. The spinning does make a big + * difference in almost every workload, but spinning for the right amount of + * time needs some help. * - * returns 1 if it was able to take the lock and zero otherwise - * - * After this call, scheduling is not safe without first calling - * btrfs_set_lock_blocking() + * In general, we want to spin as long as the lock holder is doing btree + * searches, and we should give up if they are in more expensive code. */ -int btrfs_try_spin_lock(struct extent_buffer *eb) -{ - int i; - - spin_nested(eb); - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - return 1; - spin_unlock(&eb->lock); - /* spin for a bit on the BLOCKING flag */ - for (i = 0; i < 2; i++) { - if (!btrfs_spin_on_block(eb)) - break; - - spin_nested(eb); - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - return 1; - spin_unlock(&eb->lock); - } - return 0; -} - -/* - * the autoremove wake function will return 0 if it tried to wake up - * a process that was already awake, which means that process won't - * count as an exclusive wakeup. The waitq code will continue waking - * procs until it finds one that was actually sleeping. - * - * For btrfs, this isn't quite what we want. We want a single proc - * to be notified that the lock is ready for taking. If that proc - * already happen to be awake, great, it will loop around and try for - * the lock. - * - * So, btrfs_wake_function always returns 1, even when the proc that we - * tried to wake up was already awake. - */ -static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, - int sync, void *key) -{ - autoremove_wake_function(wait, mode, sync, key); - return 1; -} - -/* - * returns with the extent buffer spinlocked. - * - * This will spin and/or wait as required to take the lock, and then - * return with the spinlock held. - * - * After this call, scheduling is not safe without first calling - * btrfs_set_lock_blocking() - */ int btrfs_tree_lock(struct extent_buffer *eb) { - DEFINE_WAIT(wait); - wait.func = btrfs_wake_function; - - while(1) { - spin_nested(eb); + int i; - /* nobody is blocking, exit with the spinlock held */ - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) + if (mutex_trylock(&eb->mutex)) + return 0; + for (i = 0; i < 512; i++) { + cpu_relax(); + if (mutex_trylock(&eb->mutex)) return 0; - - /* - * we have the spinlock, but the real owner is blocking. - * wait for them - */ - spin_unlock(&eb->lock); - - /* - * spin for a bit, and if the blocking flag goes away, - * loop around - */ - if (btrfs_spin_on_block(eb)) - continue; - - prepare_to_wait_exclusive(&eb->lock_wq, &wait, - TASK_UNINTERRUPTIBLE); - - if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - schedule(); - - finish_wait(&eb->lock_wq, &wait); } + cpu_relax(); + mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); return 0; } -/* - * Very quick trylock, this does not spin or schedule. It returns - * 1 with the spinlock held if it was able to take the lock, or it - * returns zero if it was unable to take the lock. - * - * After this call, scheduling is not safe without first calling - * btrfs_set_lock_blocking() - */ int btrfs_try_tree_lock(struct extent_buffer *eb) { - if (spin_trylock(&eb->lock)) { - if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { - /* - * we've got the spinlock, but the real owner is - * blocking. Drop the spinlock and return failure - */ - spin_unlock(&eb->lock); - return 0; - } - return 1; - } - /* someone else has the spinlock giveup */ - return 0; + return mutex_trylock(&eb->mutex); } int btrfs_tree_unlock(struct extent_buffer *eb) { - /* - * if we were a blocking owner, we don't have the spinlock held - * just clear the bit and look for waiters - */ - if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - smp_mb__after_clear_bit(); - else - spin_unlock(&eb->lock); - - if (waitqueue_active(&eb->lock_wq)) - wake_up(&eb->lock_wq); + mutex_unlock(&eb->mutex); return 0; } int btrfs_tree_locked(struct extent_buffer *eb) { - return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) || - spin_is_locked(&eb->lock); + return mutex_is_locked(&eb->mutex); } /* @@ -245,14 +75,12 @@ int btrfs_path_lock_waiting(struct btrfs_path *path, int level) { int i; struct extent_buffer *eb; - for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { eb = path->nodes[i]; if (!eb) break; smp_mb(); - if (spin_is_contended(&eb->lock) || - waitqueue_active(&eb->lock_wq)) + if (!list_empty(&eb->mutex.wait_list)) return 1; } return 0; diff --git a/trunk/fs/btrfs/locking.h b/trunk/fs/btrfs/locking.h index d92e707f5870..bc1faef12519 100644 --- a/trunk/fs/btrfs/locking.h +++ b/trunk/fs/btrfs/locking.h @@ -22,12 +22,6 @@ int btrfs_tree_lock(struct extent_buffer *eb); int btrfs_tree_unlock(struct extent_buffer *eb); int btrfs_tree_locked(struct extent_buffer *eb); - int btrfs_try_tree_lock(struct extent_buffer *eb); -int btrfs_try_spin_lock(struct extent_buffer *eb); - int btrfs_path_lock_waiting(struct btrfs_path *path, int level); - -void btrfs_set_lock_blocking(struct extent_buffer *eb); -void btrfs_clear_lock_blocking(struct extent_buffer *eb); #endif diff --git a/trunk/fs/btrfs/ordered-data.c b/trunk/fs/btrfs/ordered-data.c index 77c2411a5f0f..a20940170274 100644 --- a/trunk/fs/btrfs/ordered-data.c +++ b/trunk/fs/btrfs/ordered-data.c @@ -613,6 +613,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, struct btrfs_sector_sum *sector_sums; struct btrfs_ordered_extent *ordered; struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; + struct list_head *cur; unsigned long num_sectors; unsigned long i; u32 sectorsize = BTRFS_I(inode)->root->sectorsize; @@ -623,7 +624,8 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, return 1; mutex_lock(&tree->mutex); - list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { + list_for_each_prev(cur, &ordered->list) { + ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list); if (disk_bytenr >= ordered_sum->bytenr) { num_sectors = ordered_sum->len / sectorsize; sector_sums = ordered_sum->sums; diff --git a/trunk/fs/btrfs/ref-cache.c b/trunk/fs/btrfs/ref-cache.c index d0cc62bccb94..6f0acc4c9eab 100644 --- a/trunk/fs/btrfs/ref-cache.c +++ b/trunk/fs/btrfs/ref-cache.c @@ -17,7 +17,6 @@ */ #include -#include #include "ctree.h" #include "ref-cache.h" #include "transaction.h" diff --git a/trunk/fs/btrfs/ref-cache.h b/trunk/fs/btrfs/ref-cache.h index bc283ad2db73..16f3183d7c59 100644 --- a/trunk/fs/btrfs/ref-cache.h +++ b/trunk/fs/btrfs/ref-cache.h @@ -73,4 +73,5 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, int shared); int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); + #endif diff --git a/trunk/fs/btrfs/super.c b/trunk/fs/btrfs/super.c index f3fd7e2cbc38..db9fb3bc1e33 100644 --- a/trunk/fs/btrfs/super.c +++ b/trunk/fs/btrfs/super.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "compat.h" #include "ctree.h" @@ -582,18 +583,17 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, struct btrfs_ioctl_vol_args *vol; struct btrfs_fs_devices *fs_devices; int ret = -ENOTTY; + int len; if (!capable(CAP_SYS_ADMIN)) return -EPERM; vol = kmalloc(sizeof(*vol), GFP_KERNEL); - if (!vol) - return -ENOMEM; - if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { ret = -EFAULT; goto out; } + len = strnlen(vol->name, BTRFS_PATH_NAME_MAX); switch (cmd) { case BTRFS_IOC_SCAN_DEV: diff --git a/trunk/fs/btrfs/transaction.c b/trunk/fs/btrfs/transaction.c index 919172de5c9a..8a08f9443340 100644 --- a/trunk/fs/btrfs/transaction.c +++ b/trunk/fs/btrfs/transaction.c @@ -852,9 +852,11 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, { struct btrfs_pending_snapshot *pending; struct list_head *head = &trans->transaction->pending_snapshots; + struct list_head *cur; int ret; - list_for_each_entry(pending, head, list) { + list_for_each(cur, head) { + pending = list_entry(cur, struct btrfs_pending_snapshot, list); ret = create_pending_snapshot(trans, fs_info, pending); BUG_ON(ret); } diff --git a/trunk/fs/btrfs/tree-defrag.c b/trunk/fs/btrfs/tree-defrag.c index 98d25fa4570e..3e8358c36165 100644 --- a/trunk/fs/btrfs/tree-defrag.c +++ b/trunk/fs/btrfs/tree-defrag.c @@ -74,7 +74,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, u32 nritems; root_node = btrfs_lock_root_node(root); - btrfs_set_lock_blocking(root_node); nritems = btrfs_header_nritems(root_node); root->defrag_max.objectid = 0; /* from above we know this is not a leaf */ diff --git a/trunk/fs/btrfs/tree-log.c b/trunk/fs/btrfs/tree-log.c index 20794290256b..d81cda2e077c 100644 --- a/trunk/fs/btrfs/tree-log.c +++ b/trunk/fs/btrfs/tree-log.c @@ -77,6 +77,104 @@ static int link_to_fixup_dir(struct btrfs_trans_handle *trans, * and once to do all the other items. */ +/* + * btrfs_add_log_tree adds a new per-subvolume log tree into the + * tree of log tree roots. This must be called with a tree log transaction + * running (see start_log_trans). + */ +static int btrfs_add_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_key key; + struct btrfs_root_item root_item; + struct btrfs_inode_item *inode_item; + struct extent_buffer *leaf; + struct btrfs_root *new_root = root; + int ret; + u64 objectid = root->root_key.objectid; + + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, + BTRFS_TREE_LOG_OBJECTID, + trans->transid, 0, 0, 0); + if (IS_ERR(leaf)) { + ret = PTR_ERR(leaf); + return ret; + } + + btrfs_set_header_nritems(leaf, 0); + btrfs_set_header_level(leaf, 0); + btrfs_set_header_bytenr(leaf, leaf->start); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); + + write_extent_buffer(leaf, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(leaf), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(leaf); + + inode_item = &root_item.inode; + memset(inode_item, 0, sizeof(*inode_item)); + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nbytes = cpu_to_le64(root->leafsize); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + + btrfs_set_root_bytenr(&root_item, leaf->start); + btrfs_set_root_generation(&root_item, trans->transid); + btrfs_set_root_level(&root_item, 0); + btrfs_set_root_refs(&root_item, 0); + btrfs_set_root_used(&root_item, 0); + + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); + root_item.drop_level = 0; + + btrfs_tree_unlock(leaf); + free_extent_buffer(leaf); + leaf = NULL; + + btrfs_set_root_dirid(&root_item, 0); + + key.objectid = BTRFS_TREE_LOG_OBJECTID; + key.offset = objectid; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key, + &root_item); + if (ret) + goto fail; + + new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree, + &key); + BUG_ON(!new_root); + + WARN_ON(root->log_root); + root->log_root = new_root; + + /* + * log trees do not get reference counted because they go away + * before a real commit is actually done. They do store pointers + * to file data extents, and those reference counts still get + * updated (along with back refs to the log tree). + */ + new_root->ref_cows = 0; + new_root->last_trans = trans->transid; + + /* + * we need to make sure the root block for this new tree + * is marked as dirty in the dirty_log_pages tree. This + * is how it gets flushed down to disk at tree log commit time. + * + * the tree logging mutex keeps others from coming in and changing + * the new_root->node, so we can safely access it here + */ + set_extent_dirty(&new_root->dirty_log_pages, new_root->node->start, + new_root->node->start + new_root->node->len - 1, + GFP_NOFS); + +fail: + return ret; +} + /* * start a sub transaction and setup the log tree * this increments the log tree writer count to make the people @@ -86,14 +184,6 @@ static int start_log_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; - - mutex_lock(&root->log_mutex); - if (root->log_root) { - root->log_batch++; - atomic_inc(&root->log_writers); - mutex_unlock(&root->log_mutex); - return 0; - } mutex_lock(&root->fs_info->tree_log_mutex); if (!root->fs_info->log_root_tree) { ret = btrfs_init_log_root_tree(trans, root->fs_info); @@ -103,10 +193,9 @@ static int start_log_trans(struct btrfs_trans_handle *trans, ret = btrfs_add_log_tree(trans, root); BUG_ON(ret); } + atomic_inc(&root->fs_info->tree_log_writers); + root->fs_info->tree_log_batch++; mutex_unlock(&root->fs_info->tree_log_mutex); - root->log_batch++; - atomic_inc(&root->log_writers); - mutex_unlock(&root->log_mutex); return 0; } @@ -123,12 +212,13 @@ static int join_running_log_trans(struct btrfs_root *root) if (!root->log_root) return -ENOENT; - mutex_lock(&root->log_mutex); + mutex_lock(&root->fs_info->tree_log_mutex); if (root->log_root) { ret = 0; - atomic_inc(&root->log_writers); + atomic_inc(&root->fs_info->tree_log_writers); + root->fs_info->tree_log_batch++; } - mutex_unlock(&root->log_mutex); + mutex_unlock(&root->fs_info->tree_log_mutex); return ret; } @@ -138,11 +228,10 @@ static int join_running_log_trans(struct btrfs_root *root) */ static int end_log_trans(struct btrfs_root *root) { - if (atomic_dec_and_test(&root->log_writers)) { - smp_mb(); - if (waitqueue_active(&root->log_writer_wait)) - wake_up(&root->log_writer_wait); - } + atomic_dec(&root->fs_info->tree_log_writers); + smp_mb(); + if (waitqueue_active(&root->fs_info->tree_log_wait)) + wake_up(&root->fs_info->tree_log_wait); return 0; } @@ -1615,7 +1704,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, btrfs_tree_lock(next); clean_tree_block(trans, root, next); - btrfs_set_lock_blocking(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); @@ -1662,7 +1750,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, next = path->nodes[*level]; btrfs_tree_lock(next); clean_tree_block(trans, root, next); - btrfs_set_lock_blocking(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); @@ -1720,7 +1807,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, btrfs_tree_lock(next); clean_tree_block(trans, root, next); - btrfs_set_lock_blocking(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); @@ -1793,7 +1879,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, btrfs_tree_lock(next); clean_tree_block(trans, log, next); - btrfs_set_lock_blocking(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); @@ -1817,65 +1902,26 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, } } btrfs_free_path(path); + if (wc->free) + free_extent_buffer(log->node); return ret; } -/* - * helper function to update the item for a given subvolumes log root - * in the tree of log roots - */ -static int update_log_root(struct btrfs_trans_handle *trans, - struct btrfs_root *log) -{ - int ret; - - if (log->log_transid == 1) { - /* insert root item on the first sync */ - ret = btrfs_insert_root(trans, log->fs_info->log_root_tree, - &log->root_key, &log->root_item); - } else { - ret = btrfs_update_root(trans, log->fs_info->log_root_tree, - &log->root_key, &log->root_item); - } - return ret; -} - -static int wait_log_commit(struct btrfs_root *root, unsigned long transid) +static int wait_log_commit(struct btrfs_root *log) { DEFINE_WAIT(wait); - int index = transid % 2; + u64 transid = log->fs_info->tree_log_transid; - /* - * we only allow two pending log transactions at a time, - * so we know that if ours is more than 2 older than the - * current transaction, we're done - */ do { - prepare_to_wait(&root->log_commit_wait[index], - &wait, TASK_UNINTERRUPTIBLE); - mutex_unlock(&root->log_mutex); - if (root->log_transid < transid + 2 && - atomic_read(&root->log_commit[index])) + prepare_to_wait(&log->fs_info->tree_log_wait, &wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&log->fs_info->tree_log_mutex); + if (atomic_read(&log->fs_info->tree_log_commit)) schedule(); - finish_wait(&root->log_commit_wait[index], &wait); - mutex_lock(&root->log_mutex); - } while (root->log_transid < transid + 2 && - atomic_read(&root->log_commit[index])); - return 0; -} - -static int wait_for_writer(struct btrfs_root *root) -{ - DEFINE_WAIT(wait); - while (atomic_read(&root->log_writers)) { - prepare_to_wait(&root->log_writer_wait, - &wait, TASK_UNINTERRUPTIBLE); - mutex_unlock(&root->log_mutex); - if (atomic_read(&root->log_writers)) - schedule(); - mutex_lock(&root->log_mutex); - finish_wait(&root->log_writer_wait, &wait); - } + finish_wait(&log->fs_info->tree_log_wait, &wait); + mutex_lock(&log->fs_info->tree_log_mutex); + } while (transid == log->fs_info->tree_log_transid && + atomic_read(&log->fs_info->tree_log_commit)); return 0; } @@ -1887,114 +1933,57 @@ static int wait_for_writer(struct btrfs_root *root) int btrfs_sync_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - int index1; - int index2; int ret; + unsigned long batch; struct btrfs_root *log = root->log_root; - struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; - mutex_lock(&root->log_mutex); - index1 = root->log_transid % 2; - if (atomic_read(&root->log_commit[index1])) { - wait_log_commit(root, root->log_transid); - mutex_unlock(&root->log_mutex); - return 0; + mutex_lock(&log->fs_info->tree_log_mutex); + if (atomic_read(&log->fs_info->tree_log_commit)) { + wait_log_commit(log); + goto out; } - atomic_set(&root->log_commit[index1], 1); - - /* wait for previous tree log sync to complete */ - if (atomic_read(&root->log_commit[(index1 + 1) % 2])) - wait_log_commit(root, root->log_transid - 1); + atomic_set(&log->fs_info->tree_log_commit, 1); while (1) { - unsigned long batch = root->log_batch; - mutex_unlock(&root->log_mutex); + batch = log->fs_info->tree_log_batch; + mutex_unlock(&log->fs_info->tree_log_mutex); schedule_timeout_uninterruptible(1); - mutex_lock(&root->log_mutex); - wait_for_writer(root); - if (batch == root->log_batch) + mutex_lock(&log->fs_info->tree_log_mutex); + + while (atomic_read(&log->fs_info->tree_log_writers)) { + DEFINE_WAIT(wait); + prepare_to_wait(&log->fs_info->tree_log_wait, &wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&log->fs_info->tree_log_mutex); + if (atomic_read(&log->fs_info->tree_log_writers)) + schedule(); + mutex_lock(&log->fs_info->tree_log_mutex); + finish_wait(&log->fs_info->tree_log_wait, &wait); + } + if (batch == log->fs_info->tree_log_batch) break; } ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); BUG_ON(ret); - - btrfs_set_root_bytenr(&log->root_item, log->node->start); - btrfs_set_root_generation(&log->root_item, trans->transid); - btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); - - root->log_batch = 0; - root->log_transid++; - log->log_transid = root->log_transid; - smp_mb(); - /* - * log tree has been flushed to disk, new modifications of - * the log will be written to new positions. so it's safe to - * allow log writers to go in. - */ - mutex_unlock(&root->log_mutex); - - mutex_lock(&log_root_tree->log_mutex); - log_root_tree->log_batch++; - atomic_inc(&log_root_tree->log_writers); - mutex_unlock(&log_root_tree->log_mutex); - - ret = update_log_root(trans, log); - BUG_ON(ret); - - mutex_lock(&log_root_tree->log_mutex); - if (atomic_dec_and_test(&log_root_tree->log_writers)) { - smp_mb(); - if (waitqueue_active(&log_root_tree->log_writer_wait)) - wake_up(&log_root_tree->log_writer_wait); - } - - index2 = log_root_tree->log_transid % 2; - if (atomic_read(&log_root_tree->log_commit[index2])) { - wait_log_commit(log_root_tree, log_root_tree->log_transid); - mutex_unlock(&log_root_tree->log_mutex); - goto out; - } - atomic_set(&log_root_tree->log_commit[index2], 1); - - if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) - wait_log_commit(log_root_tree, log_root_tree->log_transid - 1); - - wait_for_writer(log_root_tree); - - ret = btrfs_write_and_wait_marked_extents(log_root_tree, - &log_root_tree->dirty_log_pages); + ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree, + &root->fs_info->log_root_tree->dirty_log_pages); BUG_ON(ret); btrfs_set_super_log_root(&root->fs_info->super_for_commit, - log_root_tree->node->start); + log->fs_info->log_root_tree->node->start); btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, - btrfs_header_level(log_root_tree->node)); - - log_root_tree->log_batch = 0; - log_root_tree->log_transid++; - smp_mb(); - - mutex_unlock(&log_root_tree->log_mutex); - - /* - * nobody else is going to jump in and write the the ctree - * super here because the log_commit atomic below is protecting - * us. We must be called with a transaction handle pinning - * the running transaction open, so a full commit can't hop - * in and cause problems either. - */ - write_ctree_super(trans, root->fs_info->tree_root, 2); + btrfs_header_level(log->fs_info->log_root_tree->node)); - atomic_set(&log_root_tree->log_commit[index2], 0); + write_ctree_super(trans, log->fs_info->tree_root, 2); + log->fs_info->tree_log_transid++; + log->fs_info->tree_log_batch = 0; + atomic_set(&log->fs_info->tree_log_commit, 0); smp_mb(); - if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) - wake_up(&log_root_tree->log_commit_wait[index2]); + if (waitqueue_active(&log->fs_info->tree_log_wait)) + wake_up(&log->fs_info->tree_log_wait); out: - atomic_set(&root->log_commit[index1], 0); - smp_mb(); - if (waitqueue_active(&root->log_commit_wait[index1])) - wake_up(&root->log_commit_wait[index1]); + mutex_unlock(&log->fs_info->tree_log_mutex); return 0; } @@ -2030,17 +2019,37 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) start, end, GFP_NOFS); } - if (log->log_transid > 0) { - ret = btrfs_del_root(trans, root->fs_info->log_root_tree, - &log->root_key); - BUG_ON(ret); - } + log = root->log_root; + ret = btrfs_del_root(trans, root->fs_info->log_root_tree, + &log->root_key); + BUG_ON(ret); root->log_root = NULL; - free_extent_buffer(log->node); - kfree(log); + kfree(root->log_root); return 0; } +/* + * helper function to update the item for a given subvolumes log root + * in the tree of log roots + */ +static int update_log_root(struct btrfs_trans_handle *trans, + struct btrfs_root *log) +{ + u64 bytenr = btrfs_root_bytenr(&log->root_item); + int ret; + + if (log->node->start == bytenr) + return 0; + + btrfs_set_root_bytenr(&log->root_item, log->node->start); + btrfs_set_root_generation(&log->root_item, trans->transid); + btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); + ret = btrfs_update_root(trans, log->fs_info->log_root_tree, + &log->root_key, &log->root_item); + BUG_ON(ret); + return ret; +} + /* * If both a file and directory are logged, and unlinks or renames are * mixed in, we have a few interesting corners: @@ -2702,6 +2711,11 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, btrfs_free_path(path); btrfs_free_path(dst_path); + + mutex_lock(&root->fs_info->tree_log_mutex); + ret = update_log_root(trans, log); + BUG_ON(ret); + mutex_unlock(&root->fs_info->tree_log_mutex); out: return 0; } diff --git a/trunk/fs/btrfs/volumes.c b/trunk/fs/btrfs/volumes.c index bcd14ebccae1..3451e1cca2b5 100644 --- a/trunk/fs/btrfs/volumes.c +++ b/trunk/fs/btrfs/volumes.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "compat.h" #include "ctree.h" @@ -103,8 +104,10 @@ static noinline struct btrfs_device *__find_device(struct list_head *head, u64 devid, u8 *uuid) { struct btrfs_device *dev; + struct list_head *cur; - list_for_each_entry(dev, head, dev_list) { + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); if (dev->devid == devid && (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) { return dev; @@ -115,9 +118,11 @@ static noinline struct btrfs_device *__find_device(struct list_head *head, static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) { + struct list_head *cur; struct btrfs_fs_devices *fs_devices; - list_for_each_entry(fs_devices, &fs_uuids, list) { + list_for_each(cur, &fs_uuids) { + fs_devices = list_entry(cur, struct btrfs_fs_devices, list); if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) return fs_devices; } @@ -154,7 +159,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) loop: spin_lock(&device->io_lock); -loop_lock: /* take all the bios off the list at once and process them * later on (without the lock held). But, remember the * tail and other pointers so the bios can be properly reinserted @@ -204,7 +208,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) * is now congested. Back off and let other work structs * run instead */ - if (pending && bdi_write_congested(bdi) && num_run > 16 && + if (pending && bdi_write_congested(bdi) && fs_info->fs_devices->open_devices > 1) { struct bio *old_head; @@ -216,8 +220,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) tail->bi_next = old_head; else device->pending_bio_tail = tail; - - device->running_pending = 1; + device->running_pending = 0; spin_unlock(&device->io_lock); btrfs_requeue_work(&device->work); @@ -226,11 +229,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) } if (again) goto loop; - - spin_lock(&device->io_lock); - if (device->pending_bios) - goto loop_lock; - spin_unlock(&device->io_lock); done: return 0; } @@ -347,11 +345,14 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) { - struct btrfs_device *device, *next; + struct list_head *tmp; + struct list_head *cur; + struct btrfs_device *device; mutex_lock(&uuid_mutex); again: - list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { + list_for_each_safe(cur, tmp, &fs_devices->devices) { + device = list_entry(cur, struct btrfs_device, dev_list); if (device->in_fs_metadata) continue; @@ -382,12 +383,14 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { + struct list_head *cur; struct btrfs_device *device; if (--fs_devices->opened > 0) return 0; - list_for_each_entry(device, &fs_devices->devices, dev_list) { + list_for_each(cur, &fs_devices->devices) { + device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev) { close_bdev_exclusive(device->bdev, device->mode); fs_devices->open_devices--; @@ -436,6 +439,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, { struct block_device *bdev; struct list_head *head = &fs_devices->devices; + struct list_head *cur; struct btrfs_device *device; struct block_device *latest_bdev = NULL; struct buffer_head *bh; @@ -446,7 +450,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int seeding = 1; int ret = 0; - list_for_each_entry(device, head, dev_list) { + list_for_each(cur, head) { + device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev) continue; if (!device->name) @@ -573,7 +578,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, *(unsigned long long *)disk_super->fsid, *(unsigned long long *)(disk_super->fsid + 8)); } - printk(KERN_CONT "devid %llu transid %llu %s\n", + printk(KERN_INFO "devid %llu transid %llu %s\n", (unsigned long long)devid, (unsigned long long)transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); @@ -1012,12 +1017,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } if (strcmp(device_path, "missing") == 0) { + struct list_head *cur; struct list_head *devices; struct btrfs_device *tmp; device = NULL; devices = &root->fs_info->fs_devices->devices; - list_for_each_entry(tmp, devices, dev_list) { + list_for_each(cur, devices) { + tmp = list_entry(cur, struct btrfs_device, dev_list); if (tmp->in_fs_metadata && !tmp->bdev) { device = tmp; break; @@ -1273,6 +1280,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) struct btrfs_trans_handle *trans; struct btrfs_device *device; struct block_device *bdev; + struct list_head *cur; struct list_head *devices; struct super_block *sb = root->fs_info->sb; u64 total_bytes; @@ -1296,7 +1304,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) mutex_lock(&root->fs_info->volume_mutex); devices = &root->fs_info->fs_devices->devices; - list_for_each_entry(device, devices, dev_list) { + list_for_each(cur, devices) { + device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev == bdev) { ret = -EEXIST; goto error; @@ -1695,6 +1704,7 @@ static u64 div_factor(u64 num, int factor) int btrfs_balance(struct btrfs_root *dev_root) { int ret; + struct list_head *cur; struct list_head *devices = &dev_root->fs_info->fs_devices->devices; struct btrfs_device *device; u64 old_size; @@ -1713,7 +1723,8 @@ int btrfs_balance(struct btrfs_root *dev_root) dev_root = dev_root->fs_info->dev_root; /* step one make some room on all the devices */ - list_for_each_entry(device, devices, dev_list) { + list_for_each(cur, devices) { + device = list_entry(cur, struct btrfs_device, dev_list); old_size = device->total_bytes; size_to_free = div_factor(old_size, 1); size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); diff --git a/trunk/fs/btrfs/xattr.c b/trunk/fs/btrfs/xattr.c index a9d3bf4d2689..7f332e270894 100644 --- a/trunk/fs/btrfs/xattr.c +++ b/trunk/fs/btrfs/xattr.c @@ -21,7 +21,6 @@ #include #include #include -#include #include "ctree.h" #include "btrfs_inode.h" #include "transaction.h" @@ -46,12 +45,9 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name, /* lookup the xattr by name */ di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, strlen(name), 0); - if (!di) { + if (!di || IS_ERR(di)) { ret = -ENODATA; goto out; - } else if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out; } leaf = path->nodes[0]; @@ -66,14 +62,6 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name, ret = -ERANGE; goto out; } - - /* - * The way things are packed into the leaf is like this - * |struct btrfs_dir_item|name|data| - * where name is the xattr name, so security.foo, and data is the - * content of the xattr. data_ptr points to the location in memory - * where the data starts in the in memory leaf - */ data_ptr = (unsigned long)((char *)(di + 1) + btrfs_dir_name_len(leaf, di)); read_extent_buffer(leaf, buffer, data_ptr, @@ -98,7 +86,7 @@ int __btrfs_setxattr(struct inode *inode, const char *name, if (!path) return -ENOMEM; - trans = btrfs_join_transaction(root, 1); + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); /* first lets see if we already have this xattr */ @@ -188,6 +176,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; + ret = 0; advance = 0; while (1) { leaf = path->nodes[0]; @@ -331,34 +320,3 @@ int btrfs_removexattr(struct dentry *dentry, const char *name) return -EOPNOTSUPP; return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); } - -int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) -{ - int err; - size_t len; - void *value; - char *suffix; - char *name; - - err = security_inode_init_security(inode, dir, &suffix, &value, &len); - if (err) { - if (err == -EOPNOTSUPP) - return 0; - return err; - } - - name = kmalloc(XATTR_SECURITY_PREFIX_LEN + strlen(suffix) + 1, - GFP_NOFS); - if (!name) { - err = -ENOMEM; - } else { - strcpy(name, XATTR_SECURITY_PREFIX); - strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); - err = __btrfs_setxattr(inode, name, value, len, 0); - kfree(name); - } - - kfree(suffix); - kfree(value); - return err; -} diff --git a/trunk/fs/btrfs/xattr.h b/trunk/fs/btrfs/xattr.h index c71e9c3cf3f7..5b1d08f8e68d 100644 --- a/trunk/fs/btrfs/xattr.h +++ b/trunk/fs/btrfs/xattr.h @@ -36,6 +36,4 @@ extern int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); extern int btrfs_removexattr(struct dentry *dentry, const char *name); -extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir); - #endif /* __XATTR__ */ diff --git a/trunk/fs/buffer.c b/trunk/fs/buffer.c index 665d446b25bc..b58208f1640a 100644 --- a/trunk/fs/buffer.c +++ b/trunk/fs/buffer.c @@ -2688,7 +2688,7 @@ int nobh_write_end(struct file *file, struct address_space *mapping, struct buffer_head *bh; BUG_ON(fsdata != NULL && page_has_buffers(page)); - if (unlikely(copied < len) && head) + if (unlikely(copied < len) && !page_has_buffers(page)) attach_nobh_buffers(page, head); if (page_has_buffers(page)) return generic_write_end(file, mapping, pos, len, diff --git a/trunk/fs/compat.c b/trunk/fs/compat.c index 65a070e705ab..d0145ca27572 100644 --- a/trunk/fs/compat.c +++ b/trunk/fs/compat.c @@ -1407,7 +1407,7 @@ int compat_do_execve(char * filename, bprm->cred = prepare_exec_creds(); if (!bprm->cred) goto out_unlock; - check_unsafe_exec(bprm); + check_unsafe_exec(bprm, current->files); file = open_exec(filename); retval = PTR_ERR(file); diff --git a/trunk/fs/ecryptfs/crypto.c b/trunk/fs/ecryptfs/crypto.c index f6caeb1d1106..c01e043670e2 100644 --- a/trunk/fs/ecryptfs/crypto.c +++ b/trunk/fs/ecryptfs/crypto.c @@ -1716,7 +1716,7 @@ static int ecryptfs_copy_filename(char **copied_name, size_t *copied_name_size, { int rc = 0; - (*copied_name) = kmalloc((name_size + 1), GFP_KERNEL); + (*copied_name) = kmalloc((name_size + 2), GFP_KERNEL); if (!(*copied_name)) { rc = -ENOMEM; goto out; @@ -1726,7 +1726,7 @@ static int ecryptfs_copy_filename(char **copied_name, size_t *copied_name_size, * in printing out the * string in debug * messages */ - (*copied_name_size) = name_size; + (*copied_name_size) = (name_size + 1); out: return rc; } diff --git a/trunk/fs/exec.c b/trunk/fs/exec.c index 0dd60a01f1b4..929b58004b7e 100644 --- a/trunk/fs/exec.c +++ b/trunk/fs/exec.c @@ -1049,16 +1049,32 @@ EXPORT_SYMBOL(install_exec_creds); * - the caller must hold current->cred_exec_mutex to protect against * PTRACE_ATTACH */ -void check_unsafe_exec(struct linux_binprm *bprm) +void check_unsafe_exec(struct linux_binprm *bprm, struct files_struct *files) { - struct task_struct *p = current; + struct task_struct *p = current, *t; + unsigned long flags; + unsigned n_fs, n_files, n_sighand; bprm->unsafe = tracehook_unsafe_exec(p); - if (atomic_read(&p->fs->count) > 1 || - atomic_read(&p->files->count) > 1 || - atomic_read(&p->sighand->count) > 1) + n_fs = 1; + n_files = 1; + n_sighand = 1; + lock_task_sighand(p, &flags); + for (t = next_thread(p); t != p; t = next_thread(t)) { + if (t->fs == p->fs) + n_fs++; + if (t->files == files) + n_files++; + n_sighand++; + } + + if (atomic_read(&p->fs->count) > n_fs || + atomic_read(&p->files->count) > n_files || + atomic_read(&p->sighand->count) > n_sighand) bprm->unsafe |= LSM_UNSAFE_SHARE; + + unlock_task_sighand(p, &flags); } /* @@ -1273,7 +1289,7 @@ int do_execve(char * filename, bprm->cred = prepare_exec_creds(); if (!bprm->cred) goto out_unlock; - check_unsafe_exec(bprm); + check_unsafe_exec(bprm, displaced); file = open_exec(filename); retval = PTR_ERR(file); diff --git a/trunk/fs/internal.h b/trunk/fs/internal.h index 53af885f1732..0d8ac497b3d5 100644 --- a/trunk/fs/internal.h +++ b/trunk/fs/internal.h @@ -43,7 +43,7 @@ extern void __init chrdev_init(void); /* * exec.c */ -extern void check_unsafe_exec(struct linux_binprm *); +extern void check_unsafe_exec(struct linux_binprm *, struct files_struct *); /* * namespace.c