From 5e1d55329a1e76f29215b146a22e31dd45999a04 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 6 Feb 2009 11:45:46 +0000
Subject: [PATCH] --- yaml --- r: 131071 b: refs/heads/master c:
 0bf2f3aec5474da80a60e1baca629af87ecb67b6 h: refs/heads/master i:   131069:
 a386c241546e477cbe015f789baceecd088e44ad   131067:
 b24afbab584f6556ae34e36a9bc7873ec8225ad0   131063:
 58f75ca64db26f9230e48c5f45d98059b716fa20   131055:
 5c79f1f60484b375f349a038218beda43e739f58   131039:
 b0f0b3acca07421562b142a41b9f7af9e2f3f04d   131007:
 512be15cfd81ac616527bb2dac4a55d758802d37   130943:
 33c0295a3809ec816993c94fa594d35a630d5f68   130815:
 2fdfc7c6dcb209deb801c6cef1e057580051dfa4   130559:
 8813e85455b51189188fd60e81270afea0483bca   130047:
 1c6e33aa348a09cce2b27e193bf121abfa4520af   129023:
 3549f31bd3edba658bc99309140bbd4d123e9fcf   126975:
 5a5977599e8e4f8e2f284d41f6ae11c492c8faff   122879:
 b3224e7a67c3a427e66fac19d63da08dfe2ae7e8   114687:
 b5e6a135573c779a59c8ee52a2b1c8f9dfd54520   98303:
 8d3fd52306c01a77327e5a8547cc363fa60bb45f   65535:
 9f7407a1ae7917d67e0caeb8494b65a264d36c67 v: v3

---
 [refs]                          |   2 +-
 trunk/MAINTAINERS               |   8 -
 trunk/arch/x86/ia32/ia32entry.S |   8 +-
 trunk/fs/binfmt_elf.c           |  14 +-
 trunk/fs/btrfs/Kconfig          |  13 -
 trunk/fs/btrfs/async-thread.c   |  61 +----
 trunk/fs/btrfs/compression.c    |   1 +
 trunk/fs/btrfs/ctree.c          | 276 +++-----------------
 trunk/fs/btrfs/ctree.h          |  28 +-
 trunk/fs/btrfs/disk-io.c        | 120 +++------
 trunk/fs/btrfs/disk-io.h        |   2 -
 trunk/fs/btrfs/extent-tree.c    | 438 ++++++--------------------------
 trunk/fs/btrfs/extent_io.c      | 132 ++--------
 trunk/fs/btrfs/extent_io.h      |  18 +-
 trunk/fs/btrfs/extent_map.c     |   1 +
 trunk/fs/btrfs/file.c           |   5 +-
 trunk/fs/btrfs/inode.c          |  84 +-----
 trunk/fs/btrfs/ioctl.c          |   1 +
 trunk/fs/btrfs/locking.c        | 208 ++-------------
 trunk/fs/btrfs/locking.h        |   6 -
 trunk/fs/btrfs/ordered-data.c   |   4 +-
 trunk/fs/btrfs/ref-cache.c      |   1 -
 trunk/fs/btrfs/ref-cache.h      |   1 +
 trunk/fs/btrfs/super.c          |   6 +-
 trunk/fs/btrfs/transaction.c    |   4 +-
 trunk/fs/btrfs/tree-defrag.c    |   1 -
 trunk/fs/btrfs/tree-log.c       | 354 +++++++++++++-------------
 trunk/fs/btrfs/volumes.c        |  49 ++--
 trunk/fs/btrfs/xattr.c          |  48 +---
 trunk/fs/btrfs/xattr.h          |   2 -
 trunk/fs/buffer.c               |   2 +-
 trunk/fs/compat.c               |   2 +-
 trunk/fs/ecryptfs/crypto.c      |   4 +-
 trunk/fs/exec.c                 |  28 +-
 trunk/fs/internal.h             |   2 +-
 35 files changed, 484 insertions(+), 1450 deletions(-)

diff --git a/[refs] b/[refs]
index b096b072228a..2dd249857957 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: ae1a25da8448271a99745da03100d5299575a269
+refs/heads/master: 0bf2f3aec5474da80a60e1baca629af87ecb67b6
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index 0ea3a6d98714..421504b59c23 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -1021,14 +1021,6 @@ M:	mb@bu3sch.de
 W:	http://bu3sch.de/btgpio.php
 S:	Maintained
 
-BTRFS FILE SYSTEM
-P:	Chris Mason
-M:	chris.mason@oracle.com
-L:	linux-btrfs@vger.kernel.org
-W:	http://btrfs.wiki.kernel.org/
-T:	git kernel.org:/pub/scm/linux/kernel/git/mason/btrfs-unstable.git
-S:	Maintained
-
 BTTV VIDEO4LINUX DRIVER
 P:	Mauro Carvalho Chehab
 M:	mchehab@infradead.org
diff --git a/trunk/arch/x86/ia32/ia32entry.S b/trunk/arch/x86/ia32/ia32entry.S
index 5a0d76dc56a4..256b00b61892 100644
--- a/trunk/arch/x86/ia32/ia32entry.S
+++ b/trunk/arch/x86/ia32/ia32entry.S
@@ -418,9 +418,9 @@ ENTRY(ia32_syscall)
 	orl   $TS_COMPAT,TI_status(%r10)
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
 	jnz ia32_tracesys
+ia32_do_syscall:	
 	cmpl $(IA32_NR_syscalls-1),%eax
-	ja ia32_badsys
-ia32_do_call:
+	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
 	IA32_ARG_FIXUP
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
@@ -435,9 +435,7 @@ ia32_tracesys:
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
-	cmpl $(IA32_NR_syscalls-1),%eax
-	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
-	jmp ia32_do_call
+	jmp ia32_do_syscall
 END(ia32_syscall)
 
 ia32_badsys:
diff --git a/trunk/fs/binfmt_elf.c b/trunk/fs/binfmt_elf.c
index 33b7235f853b..e3ff2b9e602f 100644
--- a/trunk/fs/binfmt_elf.c
+++ b/trunk/fs/binfmt_elf.c
@@ -1208,11 +1208,9 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 	 * check for an ELF header.  If we find one, dump the first page to
 	 * aid in determining what was mapped here.
 	 */
-	if (FILTER(ELF_HEADERS) &&
-	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
+	if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
 		u32 __user *header = (u32 __user *) vma->vm_start;
 		u32 word;
-		mm_segment_t fs = get_fs();
 		/*
 		 * Doing it this way gets the constant folded by GCC.
 		 */
@@ -1225,15 +1223,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 		magic.elfmag[EI_MAG1] = ELFMAG1;
 		magic.elfmag[EI_MAG2] = ELFMAG2;
 		magic.elfmag[EI_MAG3] = ELFMAG3;
-		/*
-		 * Switch to the user "segment" for get_user(),
-		 * then put back what elf_core_dump() had in place.
-		 */
-		set_fs(USER_DS);
-		if (unlikely(get_user(word, header)))
-			word = 0;
-		set_fs(fs);
-		if (word == magic.cmp)
+		if (get_user(word, header) == 0 && word == magic.cmp)
 			return PAGE_SIZE;
 	}
 
diff --git a/trunk/fs/btrfs/Kconfig b/trunk/fs/btrfs/Kconfig
index 7bb3c020e570..f8fcf999ea1b 100644
--- a/trunk/fs/btrfs/Kconfig
+++ b/trunk/fs/btrfs/Kconfig
@@ -16,16 +16,3 @@ config BTRFS_FS
 	  module will be called btrfs.
 
 	  If unsure, say N.
-
-config BTRFS_FS_POSIX_ACL
-	bool "Btrfs POSIX Access Control Lists"
-	depends on BTRFS_FS
-	select FS_POSIX_ACL
-	help
-	  POSIX Access Control Lists (ACLs) support permissions for users and
-	  groups beyond the owner/group/world scheme.
-
-	  To learn more about Access Control Lists, visit the POSIX ACLs for
-	  Linux website <http://acl.bestbits.at/>.
-
-	  If you don't know what Access Control Lists are, say N
diff --git a/trunk/fs/btrfs/async-thread.c b/trunk/fs/btrfs/async-thread.c
index c84ca1f5259a..8e2fec05dbe0 100644
--- a/trunk/fs/btrfs/async-thread.c
+++ b/trunk/fs/btrfs/async-thread.c
@@ -16,11 +16,11 @@
  * Boston, MA 021110-1307, USA.
  */
 
+#include <linux/version.h>
 #include <linux/kthread.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
-#include <linux/freezer.h>
-#include <linux/ftrace.h>
+# include <linux/freezer.h>
 #include "async-thread.h"
 
 #define WORK_QUEUED_BIT 0
@@ -143,7 +143,6 @@ static int worker_loop(void *arg)
 	struct btrfs_work *work;
 	do {
 		spin_lock_irq(&worker->lock);
-again_locked:
 		while (!list_empty(&worker->pending)) {
 			cur = worker->pending.next;
 			work = list_entry(cur, struct btrfs_work, list);
@@ -166,50 +165,14 @@ static int worker_loop(void *arg)
 			check_idle_worker(worker);
 
 		}
+		worker->working = 0;
 		if (freezing(current)) {
-			worker->working = 0;
-			spin_unlock_irq(&worker->lock);
 			refrigerator();
 		} else {
+			set_current_state(TASK_INTERRUPTIBLE);
 			spin_unlock_irq(&worker->lock);
-			if (!kthread_should_stop()) {
-				cpu_relax();
-				/*
-				 * we've dropped the lock, did someone else
-				 * jump_in?
-				 */
-				smp_mb();
-				if (!list_empty(&worker->pending))
-					continue;
-
-				/*
-				 * this short schedule allows more work to
-				 * come in without the queue functions
-				 * needing to go through wake_up_process()
-				 *
-				 * worker->working is still 1, so nobody
-				 * is going to try and wake us up
-				 */
-				schedule_timeout(1);
-				smp_mb();
-				if (!list_empty(&worker->pending))
-					continue;
-
-				/* still no more work?, sleep for real */
-				spin_lock_irq(&worker->lock);
-				set_current_state(TASK_INTERRUPTIBLE);
-				if (!list_empty(&worker->pending))
-					goto again_locked;
-
-				/*
-				 * this makes sure we get a wakeup when someone
-				 * adds something new to the queue
-				 */
-				worker->working = 0;
-				spin_unlock_irq(&worker->lock);
-
+			if (!kthread_should_stop())
 				schedule();
-			}
 			__set_current_state(TASK_RUNNING);
 		}
 	} while (!kthread_should_stop());
@@ -387,14 +350,13 @@ int btrfs_requeue_work(struct btrfs_work *work)
 {
 	struct btrfs_worker_thread *worker = work->worker;
 	unsigned long flags;
-	int wake = 0;
 
 	if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
 		goto out;
 
 	spin_lock_irqsave(&worker->lock, flags);
-	list_add_tail(&work->list, &worker->pending);
 	atomic_inc(&worker->num_pending);
+	list_add_tail(&work->list, &worker->pending);
 
 	/* by definition we're busy, take ourselves off the idle
 	 * list
@@ -406,16 +368,10 @@ int btrfs_requeue_work(struct btrfs_work *work)
 			       &worker->workers->worker_list);
 		spin_unlock_irqrestore(&worker->workers->lock, flags);
 	}
-	if (!worker->working) {
-		wake = 1;
-		worker->working = 1;
-	}
 
 	spin_unlock_irqrestore(&worker->lock, flags);
-	if (wake)
-		wake_up_process(worker->task);
-out:
 
+out:
 	return 0;
 }
 
@@ -442,10 +398,9 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
 	}
 
 	spin_lock_irqsave(&worker->lock, flags);
-
-	list_add_tail(&work->list, &worker->pending);
 	atomic_inc(&worker->num_pending);
 	check_busy_worker(worker);
+	list_add_tail(&work->list, &worker->pending);
 
 	/*
 	 * avoid calling into wake_up_process if this thread has already
diff --git a/trunk/fs/btrfs/compression.c b/trunk/fs/btrfs/compression.c
index ab07627084f1..ee848d8585d9 100644
--- a/trunk/fs/btrfs/compression.c
+++ b/trunk/fs/btrfs/compression.c
@@ -32,6 +32,7 @@
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/bit_spinlock.h>
+#include <linux/version.h>
 #include <linux/pagevec.h>
 #include "compat.h"
 #include "ctree.h"
diff --git a/trunk/fs/btrfs/ctree.c b/trunk/fs/btrfs/ctree.c
index 551177c0011a..9e46c0776816 100644
--- a/trunk/fs/btrfs/ctree.c
+++ b/trunk/fs/btrfs/ctree.c
@@ -54,31 +54,6 @@ struct btrfs_path *btrfs_alloc_path(void)
 	return path;
 }
 
-/*
- * set all locked nodes in the path to blocking locks.  This should
- * be done before scheduling
- */
-noinline void btrfs_set_path_blocking(struct btrfs_path *p)
-{
-	int i;
-	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
-		if (p->nodes[i] && p->locks[i])
-			btrfs_set_lock_blocking(p->nodes[i]);
-	}
-}
-
-/*
- * reset all the locked nodes in the patch to spinning locks.
- */
-noinline void btrfs_clear_path_blocking(struct btrfs_path *p)
-{
-	int i;
-	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
-		if (p->nodes[i] && p->locks[i])
-			btrfs_clear_lock_blocking(p->nodes[i]);
-	}
-}
-
 /* this also releases the path */
 void btrfs_free_path(struct btrfs_path *p)
 {
@@ -297,8 +272,6 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 	if (IS_ERR(cow))
 		return PTR_ERR(cow);
 
-	/* cow is set to blocking by btrfs_init_new_buffer */
-
 	copy_extent_buffer(cow, buf, 0, 0, cow->len);
 	btrfs_set_header_bytenr(cow, cow->start);
 	btrfs_set_header_generation(cow, trans->transid);
@@ -415,20 +388,17 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
 		WARN_ON(1);
 	}
 
+	spin_lock(&root->fs_info->hash_lock);
 	if (btrfs_header_generation(buf) == trans->transid &&
 	    btrfs_header_owner(buf) == root->root_key.objectid &&
 	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
 		*cow_ret = buf;
+		spin_unlock(&root->fs_info->hash_lock);
 		WARN_ON(prealloc_dest);
 		return 0;
 	}
-
+	spin_unlock(&root->fs_info->hash_lock);
 	search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
-
-	if (parent)
-		btrfs_set_lock_blocking(parent);
-	btrfs_set_lock_blocking(buf);
-
 	ret = __btrfs_cow_block(trans, root, buf, parent,
 				 parent_slot, cow_ret, search_start, 0,
 				 prealloc_dest);
@@ -534,8 +504,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 	if (parent_nritems == 1)
 		return 0;
 
-	btrfs_set_lock_blocking(parent);
-
 	for (i = start_slot; i < end_slot; i++) {
 		int close = 1;
 
@@ -596,7 +564,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 			search_start = last_block;
 
 		btrfs_tree_lock(cur);
-		btrfs_set_lock_blocking(cur);
 		err = __btrfs_cow_block(trans, root, cur, parent, i,
 					&cur, search_start,
 					min(16 * blocksize,
@@ -895,7 +862,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 		return 0;
 
 	mid = path->nodes[level];
-
 	WARN_ON(!path->locks[level]);
 	WARN_ON(btrfs_header_generation(mid) != trans->transid);
 
@@ -918,7 +884,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 		/* promote the child to a root */
 		child = read_node_slot(root, mid, 0);
 		btrfs_tree_lock(child);
-		btrfs_set_lock_blocking(child);
 		BUG_ON(!child);
 		ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
 		BUG_ON(ret);
@@ -935,7 +900,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
 		add_root_to_dirty_list(root);
 		btrfs_tree_unlock(child);
-
 		path->locks[level] = 0;
 		path->nodes[level] = NULL;
 		clean_tree_block(trans, root, mid);
@@ -960,7 +924,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 	left = read_node_slot(root, parent, pslot - 1);
 	if (left) {
 		btrfs_tree_lock(left);
-		btrfs_set_lock_blocking(left);
 		wret = btrfs_cow_block(trans, root, left,
 				       parent, pslot - 1, &left, 0);
 		if (wret) {
@@ -971,7 +934,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 	right = read_node_slot(root, parent, pslot + 1);
 	if (right) {
 		btrfs_tree_lock(right);
-		btrfs_set_lock_blocking(right);
 		wret = btrfs_cow_block(trans, root, right,
 				       parent, pslot + 1, &right, 0);
 		if (wret) {
@@ -1147,8 +1109,6 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
 		u32 left_nr;
 
 		btrfs_tree_lock(left);
-		btrfs_set_lock_blocking(left);
-
 		left_nr = btrfs_header_nritems(left);
 		if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
 			wret = 1;
@@ -1195,10 +1155,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
 	 */
 	if (right) {
 		u32 right_nr;
-
 		btrfs_tree_lock(right);
-		btrfs_set_lock_blocking(right);
-
 		right_nr = btrfs_header_nritems(right);
 		if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
 			wret = 1;
@@ -1253,7 +1210,8 @@ static noinline void reada_for_search(struct btrfs_root *root,
 	struct btrfs_disk_key disk_key;
 	u32 nritems;
 	u64 search;
-	u64 target;
+	u64 lowest_read;
+	u64 highest_read;
 	u64 nread = 0;
 	int direction = path->reada;
 	struct extent_buffer *eb;
@@ -1277,7 +1235,8 @@ static noinline void reada_for_search(struct btrfs_root *root,
 		return;
 	}
 
-	target = search;
+	highest_read = search;
+	lowest_read = search;
 
 	nritems = btrfs_header_nritems(node);
 	nr = slot;
@@ -1297,80 +1256,27 @@ static noinline void reada_for_search(struct btrfs_root *root,
 				break;
 		}
 		search = btrfs_node_blockptr(node, nr);
-		if ((search <= target && target - search <= 65536) ||
-		    (search > target && search - target <= 65536)) {
+		if ((search >= lowest_read && search <= highest_read) ||
+		    (search < lowest_read && lowest_read - search <= 16384) ||
+		    (search > highest_read && search - highest_read <= 16384)) {
 			readahead_tree_block(root, search, blocksize,
 				     btrfs_node_ptr_generation(node, nr));
 			nread += blocksize;
 		}
 		nscan++;
-		if ((nread > 65536 || nscan > 32))
+		if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32))
 			break;
-	}
-}
-
-/*
- * returns -EAGAIN if it had to drop the path, or zero if everything was in
- * cache
- */
-static noinline int reada_for_balance(struct btrfs_root *root,
-				      struct btrfs_path *path, int level)
-{
-	int slot;
-	int nritems;
-	struct extent_buffer *parent;
-	struct extent_buffer *eb;
-	u64 gen;
-	u64 block1 = 0;
-	u64 block2 = 0;
-	int ret = 0;
-	int blocksize;
 
-	parent = path->nodes[level - 1];
-	if (!parent)
-		return 0;
-
-	nritems = btrfs_header_nritems(parent);
-	slot = path->slots[level];
-	blocksize = btrfs_level_size(root, level);
+		if (nread > (256 * 1024) || nscan > 128)
+			break;
 
-	if (slot > 0) {
-		block1 = btrfs_node_blockptr(parent, slot - 1);
-		gen = btrfs_node_ptr_generation(parent, slot - 1);
-		eb = btrfs_find_tree_block(root, block1, blocksize);
-		if (eb && btrfs_buffer_uptodate(eb, gen))
-			block1 = 0;
-		free_extent_buffer(eb);
-	}
-	if (slot < nritems) {
-		block2 = btrfs_node_blockptr(parent, slot + 1);
-		gen = btrfs_node_ptr_generation(parent, slot + 1);
-		eb = btrfs_find_tree_block(root, block2, blocksize);
-		if (eb && btrfs_buffer_uptodate(eb, gen))
-			block2 = 0;
-		free_extent_buffer(eb);
-	}
-	if (block1 || block2) {
-		ret = -EAGAIN;
-		btrfs_release_path(root, path);
-		if (block1)
-			readahead_tree_block(root, block1, blocksize, 0);
-		if (block2)
-			readahead_tree_block(root, block2, blocksize, 0);
-
-		if (block1) {
-			eb = read_tree_block(root, block1, blocksize, 0);
-			free_extent_buffer(eb);
-		}
-		if (block1) {
-			eb = read_tree_block(root, block2, blocksize, 0);
-			free_extent_buffer(eb);
-		}
+		if (search < lowest_read)
+			lowest_read = search;
+		if (search > highest_read)
+			highest_read = search;
 	}
-	return ret;
 }
 
-
 /*
  * when we walk down the tree, it is usually safe to unlock the higher layers
  * in the tree.  The exceptions are when our path goes through slot 0, because
@@ -1421,32 +1327,6 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
 	}
 }
 
-/*
- * This releases any locks held in the path starting at level and
- * going all the way up to the root.
- *
- * btrfs_search_slot will keep the lock held on higher nodes in a few
- * corner cases, such as COW of the block at slot zero in the node.  This
- * ignores those rules, and it should only be called when there are no
- * more updates to be done higher up in the tree.
- */
-noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
-{
-	int i;
-
-	if (path->keep_locks || path->lowest_level)
-		return;
-
-	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
-		if (!path->nodes[i])
-			continue;
-		if (!path->locks[i])
-			continue;
-		btrfs_tree_unlock(path->nodes[i]);
-		path->locks[i] = 0;
-	}
-}
-
 /*
  * look for key in the tree.  path is filled in with nodes along the way
  * if key is found, we return zero and you can find the item in the leaf
@@ -1507,30 +1387,31 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 			int wret;
 
 			/* is a cow on this block not required */
+			spin_lock(&root->fs_info->hash_lock);
 			if (btrfs_header_generation(b) == trans->transid &&
 			    btrfs_header_owner(b) == root->root_key.objectid &&
 			    !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
+				spin_unlock(&root->fs_info->hash_lock);
 				goto cow_done;
 			}
+			spin_unlock(&root->fs_info->hash_lock);
 
 			/* ok, we have to cow, is our old prealloc the right
 			 * size?
 			 */
 			if (prealloc_block.objectid &&
 			    prealloc_block.offset != b->len) {
-				btrfs_release_path(root, p);
 				btrfs_free_reserved_extent(root,
 					   prealloc_block.objectid,
 					   prealloc_block.offset);
 				prealloc_block.objectid = 0;
-				goto again;
 			}
 
 			/*
 			 * for higher level blocks, try not to allocate blocks
 			 * with the block and the parent locks held.
 			 */
-			if (level > 0 && !prealloc_block.objectid &&
+			if (level > 1 && !prealloc_block.objectid &&
 			    btrfs_path_lock_waiting(p, level)) {
 				u32 size = b->len;
 				u64 hint = b->start;
@@ -1544,8 +1425,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 				goto again;
 			}
 
-			btrfs_set_path_blocking(p);
-
 			wret = btrfs_cow_block(trans, root, b,
 					       p->nodes[level + 1],
 					       p->slots[level + 1],
@@ -1567,22 +1446,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 		if (!p->skip_locking)
 			p->locks[level] = 1;
 
-		btrfs_clear_path_blocking(p);
-
-		/*
-		 * we have a lock on b and as long as we aren't changing
-		 * the tree, there is no way to for the items in b to change.
-		 * It is safe to drop the lock on our parent before we
-		 * go through the expensive btree search on b.
-		 *
-		 * If cow is true, then we might be changing slot zero,
-		 * which may require changing the parent.  So, we can't
-		 * drop the lock until after we know which slot we're
-		 * operating on.
-		 */
-		if (!cow)
-			btrfs_unlock_up_safe(p, level + 1);
-
 		ret = check_block(root, p, level);
 		if (ret) {
 			ret = -1;
@@ -1590,7 +1453,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 		}
 
 		ret = bin_search(b, key, level, &slot);
-
 		if (level != 0) {
 			if (ret && slot > 0)
 				slot -= 1;
@@ -1598,16 +1460,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 			if ((p->search_for_split || ins_len > 0) &&
 			    btrfs_header_nritems(b) >=
 			    BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
-				int sret;
-
-				sret = reada_for_balance(root, p, level);
-				if (sret)
-					goto again;
-
-				btrfs_set_path_blocking(p);
-				sret = split_node(trans, root, p, level);
-				btrfs_clear_path_blocking(p);
-
+				int sret = split_node(trans, root, p, level);
 				BUG_ON(sret > 0);
 				if (sret) {
 					ret = sret;
@@ -1615,19 +1468,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 				}
 				b = p->nodes[level];
 				slot = p->slots[level];
-			} else if (ins_len < 0 &&
-				   btrfs_header_nritems(b) <
-				   BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
-				int sret;
-
-				sret = reada_for_balance(root, p, level);
-				if (sret)
-					goto again;
-
-				btrfs_set_path_blocking(p);
-				sret = balance_level(trans, root, p, level);
-				btrfs_clear_path_blocking(p);
-
+			} else if (ins_len < 0) {
+				int sret = balance_level(trans, root, p,
+							 level);
 				if (sret) {
 					ret = sret;
 					goto done;
@@ -1661,7 +1504,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 				 * of the btree by dropping locks before
 				 * we read.
 				 */
-				if (level > 0) {
+				if (level > 1) {
 					btrfs_release_path(NULL, p);
 					if (tmp)
 						free_extent_buffer(tmp);
@@ -1676,7 +1519,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 						free_extent_buffer(tmp);
 					goto again;
 				} else {
-					btrfs_set_path_blocking(p);
 					if (tmp)
 						free_extent_buffer(tmp);
 					if (should_reada)
@@ -1686,29 +1528,14 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 					b = read_node_slot(root, b, slot);
 				}
 			}
-			if (!p->skip_locking) {
-				int lret;
-
-				btrfs_clear_path_blocking(p);
-				lret = btrfs_try_spin_lock(b);
-
-				if (!lret) {
-					btrfs_set_path_blocking(p);
-					btrfs_tree_lock(b);
-					btrfs_clear_path_blocking(p);
-				}
-			}
+			if (!p->skip_locking)
+				btrfs_tree_lock(b);
 		} else {
 			p->slots[level] = slot;
 			if (ins_len > 0 &&
 			    btrfs_leaf_free_space(root, b) < ins_len) {
-				int sret;
-
-				btrfs_set_path_blocking(p);
-				sret = split_leaf(trans, root, key,
+				int sret = split_leaf(trans, root, key,
 						      p, ins_len, ret == 0);
-				btrfs_clear_path_blocking(p);
-
 				BUG_ON(sret > 0);
 				if (sret) {
 					ret = sret;
@@ -1722,16 +1549,12 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 	}
 	ret = 1;
 done:
-	/*
-	 * we don't really know what they plan on doing with the path
-	 * from here on, so for now just mark it as blocking
-	 */
-	btrfs_set_path_blocking(p);
 	if (prealloc_block.objectid) {
 		btrfs_free_reserved_extent(root,
 			   prealloc_block.objectid,
 			   prealloc_block.offset);
 	}
+
 	return ret;
 }
 
@@ -1755,8 +1578,6 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
 	ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
 	BUG_ON(ret);
 
-	btrfs_set_lock_blocking(eb);
-
 	parent = eb;
 	while (1) {
 		level = btrfs_header_level(parent);
@@ -1781,7 +1602,6 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
 			eb = read_tree_block(root, bytenr, blocksize,
 					     generation);
 			btrfs_tree_lock(eb);
-			btrfs_set_lock_blocking(eb);
 		}
 
 		/*
@@ -1806,7 +1626,6 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
 				eb = read_tree_block(root, bytenr, blocksize,
 						generation);
 				btrfs_tree_lock(eb);
-				btrfs_set_lock_blocking(eb);
 			}
 
 			ret = btrfs_cow_block(trans, root, eb, parent, slot,
@@ -2353,8 +2172,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
 
 	right = read_node_slot(root, upper, slot + 1);
 	btrfs_tree_lock(right);
-	btrfs_set_lock_blocking(right);
-
 	free_space = btrfs_leaf_free_space(root, right);
 	if (free_space < data_size)
 		goto out_unlock;
@@ -2550,8 +2367,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
 
 	left = read_node_slot(root, path->nodes[1], slot - 1);
 	btrfs_tree_lock(left);
-	btrfs_set_lock_blocking(left);
-
 	free_space = btrfs_leaf_free_space(root, left);
 	if (free_space < data_size) {
 		ret = 1;
@@ -3010,12 +2825,6 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
 	path->keep_locks = 0;
 	BUG_ON(ret);
 
-	/*
-	 * make sure any changes to the path from split_leaf leave it
-	 * in a blocking state
-	 */
-	btrfs_set_path_blocking(path);
-
 	leaf = path->nodes[0];
 	BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
 
@@ -3545,7 +3354,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
 		BUG();
 	}
 out:
-	btrfs_unlock_up_safe(path, 1);
 	return ret;
 }
 
@@ -3633,22 +3441,15 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
 {
 	int ret;
 	u64 root_gen = btrfs_header_generation(path->nodes[1]);
-	u64 parent_start = path->nodes[1]->start;
-	u64 parent_owner = btrfs_header_owner(path->nodes[1]);
 
 	ret = del_ptr(trans, root, path, 1, path->slots[1]);
 	if (ret)
 		return ret;
 
-	/*
-	 * btrfs_free_extent is expensive, we want to make sure we
-	 * aren't holding any locks when we call it
-	 */
-	btrfs_unlock_up_safe(path, 0);
-
 	ret = btrfs_free_extent(trans, root, bytenr,
 				btrfs_level_size(root, 0),
-				parent_start, parent_owner,
+				path->nodes[1]->start,
+				btrfs_header_owner(path->nodes[1]),
 				root_gen, 0, 1);
 	return ret;
 }
@@ -3920,14 +3721,12 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
 		 */
 		if (slot >= nritems) {
 			path->slots[level] = slot;
-			btrfs_set_path_blocking(path);
 			sret = btrfs_find_next_key(root, path, min_key, level,
 						  cache_only, min_trans);
 			if (sret == 0) {
 				btrfs_release_path(root, path);
 				goto again;
 			} else {
-				btrfs_clear_path_blocking(path);
 				goto out;
 			}
 		}
@@ -3939,20 +3738,16 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
 			unlock_up(path, level, 1);
 			goto out;
 		}
-		btrfs_set_path_blocking(path);
 		cur = read_node_slot(root, cur, slot);
 
 		btrfs_tree_lock(cur);
-
 		path->locks[level - 1] = 1;
 		path->nodes[level - 1] = cur;
 		unlock_up(path, level, 1);
-		btrfs_clear_path_blocking(path);
 	}
 out:
 	if (ret == 0)
 		memcpy(min_key, &found_key, sizeof(found_key));
-	btrfs_set_path_blocking(path);
 	return ret;
 }
 
@@ -4048,7 +3843,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
 	if (ret < 0)
 		return ret;
 
-	btrfs_set_path_blocking(path);
 	nritems = btrfs_header_nritems(path->nodes[0]);
 	/*
 	 * by releasing the path above we dropped all our locks.  A balance
@@ -4079,7 +3873,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
 			free_extent_buffer(next);
 		}
 
-		/* the path was set to blocking above */
 		if (level == 1 && (path->locks[1] || path->skip_locking) &&
 		    path->reada)
 			reada_for_search(root, path, level, slot, 0);
@@ -4088,7 +3881,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
 		if (!path->skip_locking) {
 			WARN_ON(!btrfs_tree_locked(c));
 			btrfs_tree_lock(next);
-			btrfs_set_lock_blocking(next);
 		}
 		break;
 	}
@@ -4105,15 +3897,12 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
 			path->locks[level] = 1;
 		if (!level)
 			break;
-
-		btrfs_set_path_blocking(path);
 		if (level == 1 && path->locks[1] && path->reada)
 			reada_for_search(root, path, level, slot, 0);
 		next = read_node_slot(root, next, 0);
 		if (!path->skip_locking) {
 			WARN_ON(!btrfs_tree_locked(path->nodes[level]));
 			btrfs_tree_lock(next);
-			btrfs_set_lock_blocking(next);
 		}
 	}
 done:
@@ -4138,7 +3927,6 @@ int btrfs_previous_item(struct btrfs_root *root,
 
 	while (1) {
 		if (path->slots[0] == 0) {
-			btrfs_set_path_blocking(path);
 			ret = btrfs_prev_leaf(root, path);
 			if (ret != 0)
 				return ret;
diff --git a/trunk/fs/btrfs/ctree.h b/trunk/fs/btrfs/ctree.h
index 531db112c8bd..eee060f88113 100644
--- a/trunk/fs/btrfs/ctree.h
+++ b/trunk/fs/btrfs/ctree.h
@@ -454,11 +454,17 @@ struct btrfs_timespec {
 	__le32 nsec;
 } __attribute__ ((__packed__));
 
-enum btrfs_compression_type {
+typedef enum {
 	BTRFS_COMPRESS_NONE = 0,
 	BTRFS_COMPRESS_ZLIB = 1,
 	BTRFS_COMPRESS_LAST = 2,
-};
+} btrfs_compression_type;
+
+/* we don't understand any encryption methods right now */
+typedef enum {
+	BTRFS_ENCRYPTION_NONE = 0,
+	BTRFS_ENCRYPTION_LAST = 1,
+} btrfs_encryption_type;
 
 struct btrfs_inode_item {
 	/* nfs style generation number */
@@ -695,7 +701,9 @@ struct btrfs_fs_info {
 	struct btrfs_transaction *running_transaction;
 	wait_queue_head_t transaction_throttle;
 	wait_queue_head_t transaction_wait;
+
 	wait_queue_head_t async_submit_wait;
+	wait_queue_head_t tree_log_wait;
 
 	struct btrfs_super_block super_copy;
 	struct btrfs_super_block super_for_commit;
@@ -703,6 +711,7 @@ struct btrfs_fs_info {
 	struct super_block *sb;
 	struct inode *btree_inode;
 	struct backing_dev_info bdi;
+	spinlock_t hash_lock;
 	struct mutex trans_mutex;
 	struct mutex tree_log_mutex;
 	struct mutex transaction_kthread_mutex;
@@ -721,6 +730,10 @@ struct btrfs_fs_info {
 	atomic_t async_submit_draining;
 	atomic_t nr_async_bios;
 	atomic_t async_delalloc_pages;
+	atomic_t tree_log_writers;
+	atomic_t tree_log_commit;
+	unsigned long tree_log_batch;
+	u64 tree_log_transid;
 
 	/*
 	 * this is used by the balancing code to wait for all the pending
@@ -820,14 +833,7 @@ struct btrfs_root {
 	struct kobject root_kobj;
 	struct completion kobj_unregister;
 	struct mutex objectid_mutex;
-
 	struct mutex log_mutex;
-	wait_queue_head_t log_writer_wait;
-	wait_queue_head_t log_commit_wait[2];
-	atomic_t log_writers;
-	atomic_t log_commit[2];
-	unsigned long log_transid;
-	unsigned long log_batch;
 
 	u64 objectid;
 	u64 last_trans;
@@ -1835,10 +1841,6 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
 struct btrfs_path *btrfs_alloc_path(void);
 void btrfs_free_path(struct btrfs_path *p);
 void btrfs_init_path(struct btrfs_path *p);
-void btrfs_set_path_blocking(struct btrfs_path *p);
-void btrfs_clear_path_blocking(struct btrfs_path *p);
-void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
-
 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		   struct btrfs_path *path, int slot, int nr);
 int btrfs_del_leaf(struct btrfs_trans_handle *trans,
diff --git a/trunk/fs/btrfs/disk-io.c b/trunk/fs/btrfs/disk-io.c
index 5aebddd71193..81a313874ae5 100644
--- a/trunk/fs/btrfs/disk-io.c
+++ b/trunk/fs/btrfs/disk-io.c
@@ -16,6 +16,7 @@
  * Boston, MA 021110-1307, USA.
  */
 
+#include <linux/version.h>
 #include <linux/fs.h>
 #include <linux/blkdev.h>
 #include <linux/scatterlist.h>
@@ -799,7 +800,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 	ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
 
 	if (ret == 0)
-		set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
+		buf->flags |= EXTENT_UPTODATE;
 	else
 		WARN_ON(1);
 	return buf;
@@ -813,10 +814,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 	if (btrfs_header_generation(buf) ==
 	    root->fs_info->running_transaction->transid) {
 		WARN_ON(!btrfs_tree_locked(buf));
-
-		/* ugh, clear_extent_buffer_dirty can be expensive */
-		btrfs_set_lock_blocking(buf);
-
 		clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
 					  buf);
 	}
@@ -853,14 +850,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 	spin_lock_init(&root->list_lock);
 	mutex_init(&root->objectid_mutex);
 	mutex_init(&root->log_mutex);
-	init_waitqueue_head(&root->log_writer_wait);
-	init_waitqueue_head(&root->log_commit_wait[0]);
-	init_waitqueue_head(&root->log_commit_wait[1]);
-	atomic_set(&root->log_commit[0], 0);
-	atomic_set(&root->log_commit[1], 0);
-	atomic_set(&root->log_writers, 0);
-	root->log_batch = 0;
-	root->log_transid = 0;
 	extent_io_tree_init(&root->dirty_log_pages,
 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
 
@@ -945,16 +934,15 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
-static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
-					 struct btrfs_fs_info *fs_info)
+int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
+			     struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_root *root;
 	struct btrfs_root *tree_root = fs_info->tree_root;
-	struct extent_buffer *leaf;
 
 	root = kzalloc(sizeof(*root), GFP_NOFS);
 	if (!root)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	__setup_root(tree_root->nodesize, tree_root->leafsize,
 		     tree_root->sectorsize, tree_root->stripesize,
@@ -963,23 +951,12 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
 	root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
 	root->root_key.type = BTRFS_ROOT_ITEM_KEY;
 	root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
-	/*
-	 * log trees do not get reference counted because they go away
-	 * before a real commit is actually done.  They do store pointers
-	 * to file data extents, and those reference counts still get
-	 * updated (along with back refs to the log tree).
-	 */
 	root->ref_cows = 0;
 
-	leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
-				      0, BTRFS_TREE_LOG_OBJECTID,
-				      trans->transid, 0, 0, 0);
-	if (IS_ERR(leaf)) {
-		kfree(root);
-		return ERR_CAST(leaf);
-	}
+	root->node = btrfs_alloc_free_block(trans, root, root->leafsize,
+					    0, BTRFS_TREE_LOG_OBJECTID,
+					    trans->transid, 0, 0, 0);
 
-	root->node = leaf;
 	btrfs_set_header_nritems(root->node, 0);
 	btrfs_set_header_level(root->node, 0);
 	btrfs_set_header_bytenr(root->node, root->node->start);
@@ -991,48 +968,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
 			    BTRFS_FSID_SIZE);
 	btrfs_mark_buffer_dirty(root->node);
 	btrfs_tree_unlock(root->node);
-	return root;
-}
-
-int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
-			     struct btrfs_fs_info *fs_info)
-{
-	struct btrfs_root *log_root;
-
-	log_root = alloc_log_tree(trans, fs_info);
-	if (IS_ERR(log_root))
-		return PTR_ERR(log_root);
-	WARN_ON(fs_info->log_root_tree);
-	fs_info->log_root_tree = log_root;
-	return 0;
-}
-
-int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
-		       struct btrfs_root *root)
-{
-	struct btrfs_root *log_root;
-	struct btrfs_inode_item *inode_item;
-
-	log_root = alloc_log_tree(trans, root->fs_info);
-	if (IS_ERR(log_root))
-		return PTR_ERR(log_root);
-
-	log_root->last_trans = trans->transid;
-	log_root->root_key.offset = root->root_key.objectid;
-
-	inode_item = &log_root->root_item.inode;
-	inode_item->generation = cpu_to_le64(1);
-	inode_item->size = cpu_to_le64(3);
-	inode_item->nlink = cpu_to_le32(1);
-	inode_item->nbytes = cpu_to_le64(root->leafsize);
-	inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
-
-	btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start);
-	btrfs_set_root_generation(&log_root->root_item, trans->transid);
-
-	WARN_ON(root->log_root);
-	root->log_root = log_root;
-	root->log_transid = 0;
+	fs_info->log_root_tree = root;
 	return 0;
 }
 
@@ -1200,6 +1136,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
 {
 	struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
 	int ret = 0;
+	struct list_head *cur;
 	struct btrfs_device *device;
 	struct backing_dev_info *bdi;
 #if 0
@@ -1207,7 +1144,8 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
 	    btrfs_congested_async(info, 0))
 		return 1;
 #endif
-	list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
+	list_for_each(cur, &info->fs_devices->devices) {
+		device = list_entry(cur, struct btrfs_device, dev_list);
 		if (!device->bdev)
 			continue;
 		bdi = blk_get_backing_dev_info(device->bdev);
@@ -1225,11 +1163,13 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
  */
 static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
+	struct list_head *cur;
 	struct btrfs_device *device;
 	struct btrfs_fs_info *info;
 
 	info = (struct btrfs_fs_info *)bdi->unplug_io_data;
-	list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
+	list_for_each(cur, &info->fs_devices->devices) {
+		device = list_entry(cur, struct btrfs_device, dev_list);
 		if (!device->bdev)
 			continue;
 
@@ -1507,6 +1447,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	INIT_LIST_HEAD(&fs_info->dead_roots);
 	INIT_LIST_HEAD(&fs_info->hashers);
 	INIT_LIST_HEAD(&fs_info->delalloc_inodes);
+	spin_lock_init(&fs_info->hash_lock);
 	spin_lock_init(&fs_info->delalloc_lock);
 	spin_lock_init(&fs_info->new_trans_lock);
 	spin_lock_init(&fs_info->ref_cache_lock);
@@ -1594,6 +1535,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	init_waitqueue_head(&fs_info->transaction_throttle);
 	init_waitqueue_head(&fs_info->transaction_wait);
 	init_waitqueue_head(&fs_info->async_submit_wait);
+	init_waitqueue_head(&fs_info->tree_log_wait);
+	atomic_set(&fs_info->tree_log_commit, 0);
+	atomic_set(&fs_info->tree_log_writers, 0);
+	fs_info->tree_log_transid = 0;
 
 	__setup_root(4096, 4096, 4096, 4096, tree_root,
 		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
@@ -1682,8 +1627,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	 * low idle thresh
 	 */
 	fs_info->endio_workers.idle_thresh = 4;
-	fs_info->endio_meta_workers.idle_thresh = 4;
-
 	fs_info->endio_write_workers.idle_thresh = 64;
 	fs_info->endio_meta_write_workers.idle_thresh = 64;
 
@@ -1797,13 +1740,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
 	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
 					       "btrfs-cleaner");
-	if (IS_ERR(fs_info->cleaner_kthread))
+	if (!fs_info->cleaner_kthread)
 		goto fail_csum_root;
 
 	fs_info->transaction_kthread = kthread_run(transaction_kthread,
 						   tree_root,
 						   "btrfs-transaction");
-	if (IS_ERR(fs_info->transaction_kthread))
+	if (!fs_info->transaction_kthread)
 		goto fail_cleaner;
 
 	if (btrfs_super_log_root(disk_super) != 0) {
@@ -1885,14 +1828,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 fail_iput:
 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 	iput(fs_info->btree_inode);
-
+fail:
 	btrfs_close_devices(fs_info->fs_devices);
 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
-	bdi_destroy(&fs_info->bdi);
 
-fail:
 	kfree(extent_root);
 	kfree(tree_root);
+	bdi_destroy(&fs_info->bdi);
 	kfree(fs_info);
 	kfree(chunk_root);
 	kfree(dev_root);
@@ -2053,6 +1995,7 @@ static int write_dev_supers(struct btrfs_device *device,
 
 int write_all_supers(struct btrfs_root *root, int max_mirrors)
 {
+	struct list_head *cur;
 	struct list_head *head = &root->fs_info->fs_devices->devices;
 	struct btrfs_device *dev;
 	struct btrfs_super_block *sb;
@@ -2068,7 +2011,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
 
 	sb = &root->fs_info->super_for_commit;
 	dev_item = &sb->dev_item;
-	list_for_each_entry(dev, head, dev_list) {
+	list_for_each(cur, head) {
+		dev = list_entry(cur, struct btrfs_device, dev_list);
 		if (!dev->bdev) {
 			total_errors++;
 			continue;
@@ -2101,7 +2045,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
 	}
 
 	total_errors = 0;
-	list_for_each_entry(dev, head, dev_list) {
+	list_for_each(cur, head) {
+		dev = list_entry(cur, struct btrfs_device, dev_list);
 		if (!dev->bdev)
 			continue;
 		if (!dev->in_fs_metadata || !dev->writeable)
@@ -2315,8 +2260,6 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
 	u64 transid = btrfs_header_generation(buf);
 	struct inode *btree_inode = root->fs_info->btree_inode;
 
-	btrfs_set_lock_blocking(buf);
-
 	WARN_ON(!btrfs_tree_locked(buf));
 	if (transid != root->fs_info->generation) {
 		printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
@@ -2359,13 +2302,14 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
 	int ret;
 	ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
 	if (ret == 0)
-		set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
+		buf->flags |= EXTENT_UPTODATE;
 	return ret;
 }
 
 int btree_lock_page_hook(struct page *page)
 {
 	struct inode *inode = page->mapping->host;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct extent_buffer *eb;
 	unsigned long len;
@@ -2380,7 +2324,9 @@ int btree_lock_page_hook(struct page *page)
 		goto out;
 
 	btrfs_tree_lock(eb);
+	spin_lock(&root->fs_info->hash_lock);
 	btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
+	spin_unlock(&root->fs_info->hash_lock);
 	btrfs_tree_unlock(eb);
 	free_extent_buffer(eb);
 out:
diff --git a/trunk/fs/btrfs/disk-io.h b/trunk/fs/btrfs/disk-io.h
index 494a56eb2986..c0ff404c31b7 100644
--- a/trunk/fs/btrfs/disk-io.h
+++ b/trunk/fs/btrfs/disk-io.h
@@ -98,7 +98,5 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
 			     struct btrfs_fs_info *fs_info);
 int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
 			     struct btrfs_fs_info *fs_info);
-int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
-		       struct btrfs_root *root);
 int btree_lock_page_hook(struct page *page);
 #endif
diff --git a/trunk/fs/btrfs/extent-tree.c b/trunk/fs/btrfs/extent-tree.c
index 7527523c2d2d..293da650873f 100644
--- a/trunk/fs/btrfs/extent-tree.c
+++ b/trunk/fs/btrfs/extent-tree.c
@@ -19,7 +19,7 @@
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
-#include <linux/sort.h>
+#include <linux/version.h>
 #include "compat.h"
 #include "hash.h"
 #include "crc32c.h"
@@ -30,6 +30,7 @@
 #include "volumes.h"
 #include "locking.h"
 #include "ref-cache.h"
+#include "compat.h"
 
 #define PENDING_EXTENT_INSERT 0
 #define PENDING_EXTENT_DELETE 1
@@ -325,8 +326,10 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
 						  u64 flags)
 {
 	struct list_head *head = &info->space_info;
+	struct list_head *cur;
 	struct btrfs_space_info *found;
-	list_for_each_entry(found, head, list) {
+	list_for_each(cur, head) {
+		found = list_entry(cur, struct btrfs_space_info, list);
 		if (found->flags == flags)
 			return found;
 	}
@@ -1522,55 +1525,15 @@ int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 	return ret;
 }
 
-/* when a block goes through cow, we update the reference counts of
- * everything that block points to.  The internal pointers of the block
- * can be in just about any order, and it is likely to have clusters of
- * things that are close together and clusters of things that are not.
- *
- * To help reduce the seeks that come with updating all of these reference
- * counts, sort them by byte number before actual updates are done.
- *
- * struct refsort is used to match byte number to slot in the btree block.
- * we sort based on the byte number and then use the slot to actually
- * find the item.
- *
- * struct refsort is smaller than strcut btrfs_item and smaller than
- * struct btrfs_key_ptr.  Since we're currently limited to the page size
- * for a btree block, there's no way for a kmalloc of refsorts for a
- * single node to be bigger than a page.
- */
-struct refsort {
-	u64 bytenr;
-	u32 slot;
-};
-
-/*
- * for passing into sort()
- */
-static int refsort_cmp(const void *a_void, const void *b_void)
-{
-	const struct refsort *a = a_void;
-	const struct refsort *b = b_void;
-
-	if (a->bytenr < b->bytenr)
-		return -1;
-	if (a->bytenr > b->bytenr)
-		return 1;
-	return 0;
-}
-
-
-noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
-			   struct btrfs_root *root,
-			   struct extent_buffer *orig_buf,
-			   struct extent_buffer *buf, u32 *nr_extents)
+int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		  struct extent_buffer *orig_buf, struct extent_buffer *buf,
+		  u32 *nr_extents)
 {
 	u64 bytenr;
 	u64 ref_root;
 	u64 orig_root;
 	u64 ref_generation;
 	u64 orig_generation;
-	struct refsort *sorted;
 	u32 nritems;
 	u32 nr_file_extents = 0;
 	struct btrfs_key key;
@@ -1579,8 +1542,6 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
 	int level;
 	int ret = 0;
 	int faili = 0;
-	int refi = 0;
-	int slot;
 	int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
 			    u64, u64, u64, u64, u64, u64, u64, u64);
 
@@ -1592,9 +1553,6 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
 	nritems = btrfs_header_nritems(buf);
 	level = btrfs_header_level(buf);
 
-	sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS);
-	BUG_ON(!sorted);
-
 	if (root->ref_cows) {
 		process_func = __btrfs_inc_extent_ref;
 	} else {
@@ -1607,11 +1565,6 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
 		process_func = __btrfs_update_extent_ref;
 	}
 
-	/*
-	 * we make two passes through the items.  In the first pass we
-	 * only record the byte number and slot.  Then we sort based on
-	 * byte number and do the actual work based on the sorted results
-	 */
 	for (i = 0; i < nritems; i++) {
 		cond_resched();
 		if (level == 0) {
@@ -1628,32 +1581,6 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
 				continue;
 
 			nr_file_extents++;
-			sorted[refi].bytenr = bytenr;
-			sorted[refi].slot = i;
-			refi++;
-		} else {
-			bytenr = btrfs_node_blockptr(buf, i);
-			sorted[refi].bytenr = bytenr;
-			sorted[refi].slot = i;
-			refi++;
-		}
-	}
-	/*
-	 * if refi == 0, we didn't actually put anything into the sorted
-	 * array and we're done
-	 */
-	if (refi == 0)
-		goto out;
-
-	sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
-
-	for (i = 0; i < refi; i++) {
-		cond_resched();
-		slot = sorted[i].slot;
-		bytenr = sorted[i].bytenr;
-
-		if (level == 0) {
-			btrfs_item_key_to_cpu(buf, &key, slot);
 
 			ret = process_func(trans, root, bytenr,
 					   orig_buf->start, buf->start,
@@ -1662,25 +1589,25 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
 					   key.objectid);
 
 			if (ret) {
-				faili = slot;
+				faili = i;
 				WARN_ON(1);
 				goto fail;
 			}
 		} else {
+			bytenr = btrfs_node_blockptr(buf, i);
 			ret = process_func(trans, root, bytenr,
 					   orig_buf->start, buf->start,
 					   orig_root, ref_root,
 					   orig_generation, ref_generation,
 					   level - 1);
 			if (ret) {
-				faili = slot;
+				faili = i;
 				WARN_ON(1);
 				goto fail;
 			}
 		}
 	}
 out:
-	kfree(sorted);
 	if (nr_extents) {
 		if (level == 0)
 			*nr_extents = nr_file_extents;
@@ -1689,7 +1616,6 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
 	}
 	return 0;
 fail:
-	kfree(sorted);
 	WARN_ON(1);
 	return ret;
 }
@@ -2233,8 +2159,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
 		ret = find_first_extent_bit(&info->extent_ins, search, &start,
 					    &end, EXTENT_WRITEBACK);
 		if (ret) {
-			if (skipped && all && !num_inserts &&
-			    list_empty(&update_list)) {
+			if (skipped && all && !num_inserts) {
 				skipped = 0;
 				search = 0;
 				continue;
@@ -2622,7 +2547,6 @@ static int del_pending_extents(struct btrfs_trans_handle *trans,
 		if (ret) {
 			if (all && skipped && !nr) {
 				search = 0;
-				skipped = 0;
 				continue;
 			}
 			mutex_unlock(&info->extent_ins_mutex);
@@ -2776,9 +2700,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 	/* if metadata always pin */
 	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
 		if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
-			mutex_lock(&root->fs_info->pinned_mutex);
-			btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
-			mutex_unlock(&root->fs_info->pinned_mutex);
+			struct btrfs_block_group_cache *cache;
+
+			/* btrfs_free_reserved_extent */
+			cache = btrfs_lookup_block_group(root->fs_info, bytenr);
+			BUG_ON(!cache);
+			btrfs_add_free_space(cache, bytenr, num_bytes);
+			put_block_group(cache);
 			update_reserved_extents(root, bytenr, num_bytes, 0);
 			return 0;
 		}
@@ -3086,6 +3014,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
 {
 	struct btrfs_block_group_cache *cache;
+	struct list_head *l;
 
 	printk(KERN_INFO "space_info has %llu free, is %sfull\n",
 	       (unsigned long long)(info->total_bytes - info->bytes_used -
@@ -3093,7 +3022,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
 	       (info->full) ? "" : "not ");
 
 	down_read(&info->groups_sem);
-	list_for_each_entry(cache, &info->block_groups, list) {
+	list_for_each(l, &info->block_groups) {
+		cache = list_entry(l, struct btrfs_block_group_cache, list);
 		spin_lock(&cache->lock);
 		printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
 		       "%llu pinned %llu reserved\n",
@@ -3412,10 +3342,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
 	btrfs_set_header_generation(buf, trans->transid);
 	btrfs_tree_lock(buf);
 	clean_tree_block(trans, root, buf);
-
-	btrfs_set_lock_blocking(buf);
 	btrfs_set_buffer_uptodate(buf);
-
 	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
 		set_extent_dirty(&root->dirty_log_pages, buf->start,
 			 buf->start + buf->len - 1, GFP_NOFS);
@@ -3424,7 +3351,6 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
 			 buf->start + buf->len - 1, GFP_NOFS);
 	}
 	trans->blocks_used++;
-	/* this returns a buffer locked for blocking */
 	return buf;
 }
 
@@ -3462,73 +3388,36 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 {
 	u64 leaf_owner;
 	u64 leaf_generation;
-	struct refsort *sorted;
 	struct btrfs_key key;
 	struct btrfs_file_extent_item *fi;
 	int i;
 	int nritems;
 	int ret;
-	int refi = 0;
-	int slot;
 
 	BUG_ON(!btrfs_is_leaf(leaf));
 	nritems = btrfs_header_nritems(leaf);
 	leaf_owner = btrfs_header_owner(leaf);
 	leaf_generation = btrfs_header_generation(leaf);
 
-	sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
-	/* we do this loop twice.  The first time we build a list
-	 * of the extents we have a reference on, then we sort the list
-	 * by bytenr.  The second time around we actually do the
-	 * extent freeing.
-	 */
 	for (i = 0; i < nritems; i++) {
 		u64 disk_bytenr;
 		cond_resched();
 
 		btrfs_item_key_to_cpu(leaf, &key, i);
-
-		/* only extents have references, skip everything else */
 		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
 			continue;
-
 		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
-
-		/* inline extents live in the btree, they don't have refs */
 		if (btrfs_file_extent_type(leaf, fi) ==
 		    BTRFS_FILE_EXTENT_INLINE)
 			continue;
-
+		/*
+		 * FIXME make sure to insert a trans record that
+		 * repeats the snapshot del on crash
+		 */
 		disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
-
-		/* holes don't have refs */
 		if (disk_bytenr == 0)
 			continue;
 
-		sorted[refi].bytenr = disk_bytenr;
-		sorted[refi].slot = i;
-		refi++;
-	}
-
-	if (refi == 0)
-		goto out;
-
-	sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
-
-	for (i = 0; i < refi; i++) {
-		u64 disk_bytenr;
-
-		disk_bytenr = sorted[i].bytenr;
-		slot = sorted[i].slot;
-
-		cond_resched();
-
-		btrfs_item_key_to_cpu(leaf, &key, slot);
-		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
-			continue;
-
-		fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
-
 		ret = __btrfs_free_extent(trans, root, disk_bytenr,
 				btrfs_file_extent_disk_num_bytes(leaf, fi),
 				leaf->start, leaf_owner, leaf_generation,
@@ -3539,8 +3428,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 		wake_up(&root->fs_info->transaction_throttle);
 		cond_resched();
 	}
-out:
-	kfree(sorted);
 	return 0;
 }
 
@@ -3550,25 +3437,9 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
 {
 	int i;
 	int ret;
-	struct btrfs_extent_info *info;
-	struct refsort *sorted;
-
-	if (ref->nritems == 0)
-		return 0;
+	struct btrfs_extent_info *info = ref->extents;
 
-	sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS);
 	for (i = 0; i < ref->nritems; i++) {
-		sorted[i].bytenr = ref->extents[i].bytenr;
-		sorted[i].slot = i;
-	}
-	sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL);
-
-	/*
-	 * the items in the ref were sorted when the ref was inserted
-	 * into the ref cache, so this is already in order
-	 */
-	for (i = 0; i < ref->nritems; i++) {
-		info = ref->extents + sorted[i].slot;
 		ret = __btrfs_free_extent(trans, root, info->bytenr,
 					  info->num_bytes, ref->bytenr,
 					  ref->owner, ref->generation,
@@ -3582,7 +3453,6 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
 		info++;
 	}
 
-	kfree(sorted);
 	return 0;
 }
 
@@ -3626,152 +3496,6 @@ static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start,
 	return ret;
 }
 
-/*
- * this is used while deleting old snapshots, and it drops the refs
- * on a whole subtree starting from a level 1 node.
- *
- * The idea is to sort all the leaf pointers, and then drop the
- * ref on all the leaves in order.  Most of the time the leaves
- * will have ref cache entries, so no leaf IOs will be required to
- * find the extents they have references on.
- *
- * For each leaf, any references it has are also dropped in order
- *
- * This ends up dropping the references in something close to optimal
- * order for reading and modifying the extent allocation tree.
- */
-static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
-					struct btrfs_root *root,
-					struct btrfs_path *path)
-{
-	u64 bytenr;
-	u64 root_owner;
-	u64 root_gen;
-	struct extent_buffer *eb = path->nodes[1];
-	struct extent_buffer *leaf;
-	struct btrfs_leaf_ref *ref;
-	struct refsort *sorted = NULL;
-	int nritems = btrfs_header_nritems(eb);
-	int ret;
-	int i;
-	int refi = 0;
-	int slot = path->slots[1];
-	u32 blocksize = btrfs_level_size(root, 0);
-	u32 refs;
-
-	if (nritems == 0)
-		goto out;
-
-	root_owner = btrfs_header_owner(eb);
-	root_gen = btrfs_header_generation(eb);
-	sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
-
-	/*
-	 * step one, sort all the leaf pointers so we don't scribble
-	 * randomly into the extent allocation tree
-	 */
-	for (i = slot; i < nritems; i++) {
-		sorted[refi].bytenr = btrfs_node_blockptr(eb, i);
-		sorted[refi].slot = i;
-		refi++;
-	}
-
-	/*
-	 * nritems won't be zero, but if we're picking up drop_snapshot
-	 * after a crash, slot might be > 0, so double check things
-	 * just in case.
-	 */
-	if (refi == 0)
-		goto out;
-
-	sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
-
-	/*
-	 * the first loop frees everything the leaves point to
-	 */
-	for (i = 0; i < refi; i++) {
-		u64 ptr_gen;
-
-		bytenr = sorted[i].bytenr;
-
-		/*
-		 * check the reference count on this leaf.  If it is > 1
-		 * we just decrement it below and don't update any
-		 * of the refs the leaf points to.
-		 */
-		ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
-		BUG_ON(ret);
-		if (refs != 1)
-			continue;
-
-		ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot);
-
-		/*
-		 * the leaf only had one reference, which means the
-		 * only thing pointing to this leaf is the snapshot
-		 * we're deleting.  It isn't possible for the reference
-		 * count to increase again later
-		 *
-		 * The reference cache is checked for the leaf,
-		 * and if found we'll be able to drop any refs held by
-		 * the leaf without needing to read it in.
-		 */
-		ref = btrfs_lookup_leaf_ref(root, bytenr);
-		if (ref && ref->generation != ptr_gen) {
-			btrfs_free_leaf_ref(root, ref);
-			ref = NULL;
-		}
-		if (ref) {
-			ret = cache_drop_leaf_ref(trans, root, ref);
-			BUG_ON(ret);
-			btrfs_remove_leaf_ref(root, ref);
-			btrfs_free_leaf_ref(root, ref);
-		} else {
-			/*
-			 * the leaf wasn't in the reference cache, so
-			 * we have to read it.
-			 */
-			leaf = read_tree_block(root, bytenr, blocksize,
-					       ptr_gen);
-			ret = btrfs_drop_leaf_ref(trans, root, leaf);
-			BUG_ON(ret);
-			free_extent_buffer(leaf);
-		}
-		atomic_inc(&root->fs_info->throttle_gen);
-		wake_up(&root->fs_info->transaction_throttle);
-		cond_resched();
-	}
-
-	/*
-	 * run through the loop again to free the refs on the leaves.
-	 * This is faster than doing it in the loop above because
-	 * the leaves are likely to be clustered together.  We end up
-	 * working in nice chunks on the extent allocation tree.
-	 */
-	for (i = 0; i < refi; i++) {
-		bytenr = sorted[i].bytenr;
-		ret = __btrfs_free_extent(trans, root, bytenr,
-					blocksize, eb->start,
-					root_owner, root_gen, 0, 1);
-		BUG_ON(ret);
-
-		atomic_inc(&root->fs_info->throttle_gen);
-		wake_up(&root->fs_info->transaction_throttle);
-		cond_resched();
-	}
-out:
-	kfree(sorted);
-
-	/*
-	 * update the path to show we've processed the entire level 1
-	 * node.  This will get saved into the root's drop_snapshot_progress
-	 * field so these drops are not repeated again if this transaction
-	 * commits.
-	 */
-	path->slots[1] = nritems;
-	return 0;
-}
-
 /*
  * helper function for drop_snapshot, this walks down the tree dropping ref
  * counts as it goes.
@@ -3787,6 +3511,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
 	struct extent_buffer *next;
 	struct extent_buffer *cur;
 	struct extent_buffer *parent;
+	struct btrfs_leaf_ref *ref;
 	u32 blocksize;
 	int ret;
 	u32 refs;
@@ -3813,46 +3538,17 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
 		if (path->slots[*level] >=
 		    btrfs_header_nritems(cur))
 			break;
-
-		/* the new code goes down to level 1 and does all the
-		 * leaves pointed to that node in bulk.  So, this check
-		 * for level 0 will always be false.
-		 *
-		 * But, the disk format allows the drop_snapshot_progress
-		 * field in the root to leave things in a state where
-		 * a leaf will need cleaning up here.  If someone crashes
-		 * with the old code and then boots with the new code,
-		 * we might find a leaf here.
-		 */
 		if (*level == 0) {
 			ret = btrfs_drop_leaf_ref(trans, root, cur);
 			BUG_ON(ret);
 			break;
 		}
-
-		/*
-		 * once we get to level one, process the whole node
-		 * at once, including everything below it.
-		 */
-		if (*level == 1) {
-			ret = drop_level_one_refs(trans, root, path);
-			BUG_ON(ret);
-			break;
-		}
-
 		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
 		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
 		blocksize = btrfs_level_size(root, *level - 1);
 
 		ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
 		BUG_ON(ret);
-
-		/*
-		 * if there is more than one reference, we don't need
-		 * to read that node to drop any references it has.  We
-		 * just drop the ref we hold on that node and move on to the
-		 * next slot in this level.
-		 */
 		if (refs != 1) {
 			parent = path->nodes[*level];
 			root_owner = btrfs_header_owner(parent);
@@ -3871,12 +3567,46 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
 
 			continue;
 		}
-
 		/*
-		 * we need to keep freeing things in the next level down.
-		 * read the block and loop around to process it
+		 * at this point, we have a single ref, and since the
+		 * only place referencing this extent is a dead root
+		 * the reference count should never go higher.
+		 * So, we don't need to check it again
 		 */
-		next = read_tree_block(root, bytenr, blocksize, ptr_gen);
+		if (*level == 1) {
+			ref = btrfs_lookup_leaf_ref(root, bytenr);
+			if (ref && ref->generation != ptr_gen) {
+				btrfs_free_leaf_ref(root, ref);
+				ref = NULL;
+			}
+			if (ref) {
+				ret = cache_drop_leaf_ref(trans, root, ref);
+				BUG_ON(ret);
+				btrfs_remove_leaf_ref(root, ref);
+				btrfs_free_leaf_ref(root, ref);
+				*level = 0;
+				break;
+			}
+		}
+		next = btrfs_find_tree_block(root, bytenr, blocksize);
+		if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
+			free_extent_buffer(next);
+
+			next = read_tree_block(root, bytenr, blocksize,
+					       ptr_gen);
+			cond_resched();
+#if 0
+			/*
+			 * this is a debugging check and can go away
+			 * the ref should never go all the way down to 1
+			 * at this point
+			 */
+			ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
+						&refs);
+			BUG_ON(ret);
+			WARN_ON(refs != 1);
+#endif
+		}
 		WARN_ON(*level <= 0);
 		if (path->nodes[*level-1])
 			free_extent_buffer(path->nodes[*level-1]);
@@ -3901,16 +3631,11 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
 	root_owner = btrfs_header_owner(parent);
 	root_gen = btrfs_header_generation(parent);
 
-	/*
-	 * cleanup and free the reference on the last node
-	 * we processed
-	 */
 	ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
 				  parent->start, root_owner, root_gen,
 				  *level, 1);
 	free_extent_buffer(path->nodes[*level]);
 	path->nodes[*level] = NULL;
-
 	*level += 1;
 	BUG_ON(ret);
 
@@ -3962,7 +3687,6 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
 
 		next = read_tree_block(root, bytenr, blocksize, ptr_gen);
 		btrfs_tree_lock(next);
-		btrfs_set_lock_blocking(next);
 
 		ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
 					      &refs);
@@ -4030,13 +3754,6 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
 		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
 			struct extent_buffer *node;
 			struct btrfs_disk_key disk_key;
-
-			/*
-			 * there is more work to do in this level.
-			 * Update the drop_progress marker to reflect
-			 * the work we've done so far, and then bump
-			 * the slot number
-			 */
 			node = path->nodes[i];
 			path->slots[i]++;
 			*level = i;
@@ -4048,11 +3765,6 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
 			return 0;
 		} else {
 			struct extent_buffer *parent;
-
-			/*
-			 * this whole node is done, free our reference
-			 * on it and go up one level
-			 */
 			if (path->nodes[*level] == root->node)
 				parent = path->nodes[*level];
 			else
@@ -4732,7 +4444,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
 	u64 lock_end = 0;
 	u64 num_bytes;
 	u64 ext_offset;
-	u64 search_end = (u64)-1;
+	u64 first_pos;
 	u32 nritems;
 	int nr_scaned = 0;
 	int extent_locked = 0;
@@ -4740,6 +4452,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
 	int ret;
 
 	memcpy(&key, leaf_key, sizeof(key));
+	first_pos = INT_LIMIT(loff_t) - extent_key->offset;
 	if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
 		if (key.objectid < ref_path->owner_objectid ||
 		    (key.objectid == ref_path->owner_objectid &&
@@ -4788,7 +4501,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
 			if ((key.objectid > ref_path->owner_objectid) ||
 			    (key.objectid == ref_path->owner_objectid &&
 			     key.type > BTRFS_EXTENT_DATA_KEY) ||
-			    key.offset >= search_end)
+			    (key.offset >= first_pos + extent_key->offset))
 				break;
 		}
 
@@ -4821,10 +4534,8 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
 		num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
 		ext_offset = btrfs_file_extent_offset(leaf, fi);
 
-		if (search_end == (u64)-1) {
-			search_end = key.offset - ext_offset +
-				btrfs_file_extent_ram_bytes(leaf, fi);
-		}
+		if (first_pos > key.offset - ext_offset)
+			first_pos = key.offset - ext_offset;
 
 		if (!extent_locked) {
 			lock_start = key.offset;
@@ -5013,7 +4724,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
 		}
 skip:
 		if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
-		    key.offset >= search_end)
+		    key.offset >= first_pos + extent_key->offset)
 			break;
 
 		cond_resched();
@@ -5067,7 +4778,6 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
 		ref->bytenr = buf->start;
 		ref->owner = btrfs_header_owner(buf);
 		ref->generation = btrfs_header_generation(buf);
-
 		ret = btrfs_add_leaf_ref(root, ref, 0);
 		WARN_ON(ret);
 		btrfs_free_leaf_ref(root, ref);
@@ -6247,11 +5957,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
-	spin_lock(&root->fs_info->block_group_cache_lock);
+	btrfs_remove_free_space_cache(block_group);
 	rb_erase(&block_group->cache_node,
 		 &root->fs_info->block_group_cache_tree);
-	spin_unlock(&root->fs_info->block_group_cache_lock);
-	btrfs_remove_free_space_cache(block_group);
 	down_write(&block_group->space_info->groups_sem);
 	list_del(&block_group->list);
 	up_write(&block_group->space_info->groups_sem);
diff --git a/trunk/fs/btrfs/extent_io.c b/trunk/fs/btrfs/extent_io.c
index 37d43b516b79..e086d407f1fa 100644
--- a/trunk/fs/btrfs/extent_io.c
+++ b/trunk/fs/btrfs/extent_io.c
@@ -9,6 +9,7 @@
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
 #include <linux/swap.h>
+#include <linux/version.h>
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
 #include "extent_io.h"
@@ -30,7 +31,7 @@ static LIST_HEAD(buffers);
 static LIST_HEAD(states);
 
 #define LEAK_DEBUG 0
-#if LEAK_DEBUG
+#ifdef LEAK_DEBUG
 static DEFINE_SPINLOCK(leak_lock);
 #endif
 
@@ -119,7 +120,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
 static struct extent_state *alloc_extent_state(gfp_t mask)
 {
 	struct extent_state *state;
-#if LEAK_DEBUG
+#ifdef LEAK_DEBUG
 	unsigned long flags;
 #endif
 
@@ -129,7 +130,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
 	state->state = 0;
 	state->private = 0;
 	state->tree = NULL;
-#if LEAK_DEBUG
+#ifdef LEAK_DEBUG
 	spin_lock_irqsave(&leak_lock, flags);
 	list_add(&state->leak_list, &states);
 	spin_unlock_irqrestore(&leak_lock, flags);
@@ -144,11 +145,11 @@ static void free_extent_state(struct extent_state *state)
 	if (!state)
 		return;
 	if (atomic_dec_and_test(&state->refs)) {
-#if LEAK_DEBUG
+#ifdef LEAK_DEBUG
 		unsigned long flags;
 #endif
 		WARN_ON(state->tree);
-#if LEAK_DEBUG
+#ifdef LEAK_DEBUG
 		spin_lock_irqsave(&leak_lock, flags);
 		list_del(&state->leak_list);
 		spin_unlock_irqrestore(&leak_lock, flags);
@@ -2377,6 +2378,11 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 	int scanned = 0;
 	int range_whole = 0;
 
+	if (wbc->nonblocking && bdi_write_congested(bdi)) {
+		wbc->encountered_congestion = 1;
+		return 0;
+	}
+
 	pagevec_init(&pvec, 0);
 	if (wbc->range_cyclic) {
 		index = mapping->writeback_index; /* Start from prev offset */
@@ -2849,98 +2855,6 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
 	return sector;
 }
 
-int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-		__u64 start, __u64 len, get_extent_t *get_extent)
-{
-	int ret;
-	u64 off = start;
-	u64 max = start + len;
-	u32 flags = 0;
-	u64 disko = 0;
-	struct extent_map *em = NULL;
-	int end = 0;
-	u64 em_start = 0, em_len = 0;
-	unsigned long emflags;
-	ret = 0;
-
-	if (len == 0)
-		return -EINVAL;
-
-	lock_extent(&BTRFS_I(inode)->io_tree, start, start + len,
-		GFP_NOFS);
-	em = get_extent(inode, NULL, 0, off, max - off, 0);
-	if (!em)
-		goto out;
-	if (IS_ERR(em)) {
-		ret = PTR_ERR(em);
-		goto out;
-	}
-	while (!end) {
-		off = em->start + em->len;
-		if (off >= max)
-			end = 1;
-
-		em_start = em->start;
-		em_len = em->len;
-
-		disko = 0;
-		flags = 0;
-
-		switch (em->block_start) {
-		case EXTENT_MAP_LAST_BYTE:
-			end = 1;
-			flags |= FIEMAP_EXTENT_LAST;
-			break;
-		case EXTENT_MAP_HOLE:
-			flags |= FIEMAP_EXTENT_UNWRITTEN;
-			break;
-		case EXTENT_MAP_INLINE:
-			flags |= (FIEMAP_EXTENT_DATA_INLINE |
-				  FIEMAP_EXTENT_NOT_ALIGNED);
-			break;
-		case EXTENT_MAP_DELALLOC:
-			flags |= (FIEMAP_EXTENT_DELALLOC |
-				  FIEMAP_EXTENT_UNKNOWN);
-			break;
-		default:
-			disko = em->block_start;
-			break;
-		}
-		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
-			flags |= FIEMAP_EXTENT_ENCODED;
-
-		emflags = em->flags;
-		free_extent_map(em);
-		em = NULL;
-
-		if (!end) {
-			em = get_extent(inode, NULL, 0, off, max - off, 0);
-			if (!em)
-				goto out;
-			if (IS_ERR(em)) {
-				ret = PTR_ERR(em);
-				goto out;
-			}
-			emflags = em->flags;
-		}
-		if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
-			flags |= FIEMAP_EXTENT_LAST;
-			end = 1;
-		}
-
-		ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
-					em_len, flags);
-		if (ret)
-			goto out_free;
-	}
-out_free:
-	free_extent_map(em);
-out:
-	unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len,
-			GFP_NOFS);
-	return ret;
-}
-
 static inline struct page *extent_buffer_page(struct extent_buffer *eb,
 					      unsigned long i)
 {
@@ -2978,17 +2892,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
 						   gfp_t mask)
 {
 	struct extent_buffer *eb = NULL;
-#if LEAK_DEBUG
+#ifdef LEAK_DEBUG
 	unsigned long flags;
 #endif
 
 	eb = kmem_cache_zalloc(extent_buffer_cache, mask);
 	eb->start = start;
 	eb->len = len;
-	spin_lock_init(&eb->lock);
-	init_waitqueue_head(&eb->lock_wq);
-
-#if LEAK_DEBUG
+	mutex_init(&eb->mutex);
+#ifdef LEAK_DEBUG
 	spin_lock_irqsave(&leak_lock, flags);
 	list_add(&eb->leak_list, &buffers);
 	spin_unlock_irqrestore(&leak_lock, flags);
@@ -3000,7 +2912,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
 
 static void __free_extent_buffer(struct extent_buffer *eb)
 {
-#if LEAK_DEBUG
+#ifdef LEAK_DEBUG
 	unsigned long flags;
 	spin_lock_irqsave(&leak_lock, flags);
 	list_del(&eb->leak_list);
@@ -3068,7 +2980,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
 		unlock_page(p);
 	}
 	if (uptodate)
-		set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+		eb->flags |= EXTENT_UPTODATE;
+	eb->flags |= EXTENT_BUFFER_FILLED;
 
 	spin_lock(&tree->buffer_lock);
 	exists = buffer_tree_insert(tree, start, &eb->rb_node);
@@ -3222,7 +3135,7 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
 	unsigned long num_pages;
 
 	num_pages = num_extent_pages(eb->start, eb->len);
-	clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+	eb->flags &= ~EXTENT_UPTODATE;
 
 	clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
 			      GFP_NOFS);
@@ -3293,7 +3206,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
 	struct page *page;
 	int pg_uptodate = 1;
 
-	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
+	if (eb->flags & EXTENT_UPTODATE)
 		return 1;
 
 	ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
@@ -3329,7 +3242,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
 	struct bio *bio = NULL;
 	unsigned long bio_flags = 0;
 
-	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
+	if (eb->flags & EXTENT_UPTODATE)
 		return 0;
 
 	if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
@@ -3360,7 +3273,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
 	}
 	if (all_uptodate) {
 		if (start_i == 0)
-			set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+			eb->flags |= EXTENT_UPTODATE;
 		goto unlock_exit;
 	}
 
@@ -3396,7 +3309,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
 	}
 
 	if (!ret)
-		set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+		eb->flags |= EXTENT_UPTODATE;
 	return ret;
 
 unlock_exit:
@@ -3493,6 +3406,7 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
 		unmap_extent_buffer(eb, eb->map_token, km);
 		eb->map_token = NULL;
 		save = 1;
+		WARN_ON(!mutex_is_locked(&eb->mutex));
 	}
 	err = map_private_extent_buffer(eb, start, min_len, token, map,
 				       map_start, map_len, km);
diff --git a/trunk/fs/btrfs/extent_io.h b/trunk/fs/btrfs/extent_io.h
index 1f9df88afbf6..c5b483a79137 100644
--- a/trunk/fs/btrfs/extent_io.h
+++ b/trunk/fs/btrfs/extent_io.h
@@ -22,10 +22,6 @@
 /* flags for bio submission */
 #define EXTENT_BIO_COMPRESSED 1
 
-/* these are bit numbers for test/set bit */
-#define EXTENT_BUFFER_UPTODATE 0
-#define EXTENT_BUFFER_BLOCKING 1
-
 /*
  * page->private values.  Every page that is controlled by the extent
  * map has page->private set to one.
@@ -99,19 +95,11 @@ struct extent_buffer {
 	unsigned long map_start;
 	unsigned long map_len;
 	struct page *first_page;
-	unsigned long bflags;
 	atomic_t refs;
+	int flags;
 	struct list_head leak_list;
 	struct rb_node rb_node;
-
-	/* the spinlock is used to protect most operations */
-	spinlock_t lock;
-
-	/*
-	 * when we keep the lock held while blocking, waiters go onto
-	 * the wq
-	 */
-	wait_queue_head_t lock_wq;
+	struct mutex mutex;
 };
 
 struct extent_map_tree;
@@ -205,8 +193,6 @@ int extent_commit_write(struct extent_io_tree *tree,
 			unsigned from, unsigned to);
 sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
 		get_extent_t *get_extent);
-int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-		__u64 start, __u64 len, get_extent_t *get_extent);
 int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end);
 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
diff --git a/trunk/fs/btrfs/extent_map.c b/trunk/fs/btrfs/extent_map.c
index 50da69da20ce..4a83e33ada32 100644
--- a/trunk/fs/btrfs/extent_map.c
+++ b/trunk/fs/btrfs/extent_map.c
@@ -3,6 +3,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
+#include <linux/version.h>
 #include <linux/hardirq.h>
 #include "extent_map.h"
 
diff --git a/trunk/fs/btrfs/file.c b/trunk/fs/btrfs/file.c
index 3e8023efaff7..90268334145e 100644
--- a/trunk/fs/btrfs/file.c
+++ b/trunk/fs/btrfs/file.c
@@ -29,6 +29,7 @@
 #include <linux/writeback.h>
 #include <linux/statfs.h>
 #include <linux/compat.h>
+#include <linux/version.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -1214,10 +1215,10 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 	}
 	mutex_unlock(&root->fs_info->trans_mutex);
 
-	root->log_batch++;
+	root->fs_info->tree_log_batch++;
 	filemap_fdatawrite(inode->i_mapping);
 	btrfs_wait_ordered_range(inode, 0, (u64)-1);
-	root->log_batch++;
+	root->fs_info->tree_log_batch++;
 
 	/*
 	 * ok we haven't committed the transaction yet, lets do a commit
diff --git a/trunk/fs/btrfs/inode.c b/trunk/fs/btrfs/inode.c
index 8f0706210a47..8adfe059ab41 100644
--- a/trunk/fs/btrfs/inode.c
+++ b/trunk/fs/btrfs/inode.c
@@ -34,6 +34,7 @@
 #include <linux/statfs.h>
 #include <linux/compat.h>
 #include <linux/bit_spinlock.h>
+#include <linux/version.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
 #include <linux/falloc.h>
@@ -50,7 +51,6 @@
 #include "tree-log.h"
 #include "ref-cache.h"
 #include "compression.h"
-#include "locking.h"
 
 struct btrfs_iget_args {
 	u64 ino;
@@ -91,16 +91,6 @@ static noinline int cow_file_range(struct inode *inode,
 				   u64 start, u64 end, int *page_started,
 				   unsigned long *nr_written, int unlock);
 
-static int btrfs_init_inode_security(struct inode *inode,  struct inode *dir)
-{
-	int err;
-
-	err = btrfs_init_acl(inode, dir);
-	if (!err)
-		err = btrfs_xattr_security_init(inode, dir);
-	return err;
-}
-
 /*
  * a very lame attempt at stopping writes when the FS is 85% full.  There
  * are countless ways this is incorrect, but it is better than nothing.
@@ -360,19 +350,6 @@ static noinline int compress_file_range(struct inode *inode,
 	nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
 	nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE);
 
-	/*
-	 * we don't want to send crud past the end of i_size through
-	 * compression, that's just a waste of CPU time.  So, if the
-	 * end of the file is before the start of our current
-	 * requested range of bytes, we bail out to the uncompressed
-	 * cleanup code that can deal with all of this.
-	 *
-	 * It isn't really the fastest way to fix things, but this is a
-	 * very uncommon corner.
-	 */
-	if (actual_end <= start)
-		goto cleanup_and_bail_uncompressed;
-
 	total_compressed = actual_end - start;
 
 	/* we want to make sure that amount of ram required to uncompress
@@ -517,7 +494,6 @@ static noinline int compress_file_range(struct inode *inode,
 			goto again;
 		}
 	} else {
-cleanup_and_bail_uncompressed:
 		/*
 		 * No compression, but we still need to write the pages in
 		 * the file we've been given so far.  redirty the locked
@@ -1348,11 +1324,12 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
 			     struct inode *inode, u64 file_offset,
 			     struct list_head *list)
 {
+	struct list_head *cur;
 	struct btrfs_ordered_sum *sum;
 
 	btrfs_set_trans_block_group(trans, inode);
-
-	list_for_each_entry(sum, list, list) {
+	list_for_each(cur, list) {
+		sum = list_entry(cur, struct btrfs_ordered_sum, list);
 		btrfs_csum_file_blocks(trans,
 		       BTRFS_I(inode)->root->fs_info->csum_root, sum);
 	}
@@ -2036,7 +2013,6 @@ void btrfs_read_locked_inode(struct inode *inode)
 	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
 
 	alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
-
 	BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
 						alloc_group_block, 0);
 	btrfs_free_path(path);
@@ -2063,7 +2039,6 @@ void btrfs_read_locked_inode(struct inode *inode)
 		inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
 		break;
 	default:
-		inode->i_op = &btrfs_special_inode_operations;
 		init_special_inode(inode, inode->i_mode, rdev);
 		break;
 	}
@@ -2133,7 +2108,6 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
 		goto failed;
 	}
 
-	btrfs_unlock_up_safe(path, 1);
 	leaf = path->nodes[0];
 	inode_item = btrfs_item_ptr(leaf, path->slots[0],
 				  struct btrfs_inode_item);
@@ -2455,8 +2429,6 @@ static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans,
 			ref->generation = leaf_gen;
 			ref->nritems = 0;
 
-			btrfs_sort_leaf_ref(ref);
-
 			ret = btrfs_add_leaf_ref(root, ref, 0);
 			WARN_ON(ret);
 			btrfs_free_leaf_ref(root, ref);
@@ -2504,7 +2476,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 	struct btrfs_path *path;
 	struct btrfs_key key;
 	struct btrfs_key found_key;
-	u32 found_type = (u8)-1;
+	u32 found_type;
 	struct extent_buffer *leaf;
 	struct btrfs_file_extent_item *fi;
 	u64 extent_start = 0;
@@ -2691,8 +2663,6 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			if (pending_del_nr)
 				goto del_pending;
 			btrfs_release_path(root, path);
-			if (found_type == BTRFS_INODE_ITEM_KEY)
-				break;
 			goto search_again;
 		}
 
@@ -2709,8 +2679,6 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			BUG_ON(ret);
 			pending_del_nr = 0;
 			btrfs_release_path(root, path);
-			if (found_type == BTRFS_INODE_ITEM_KEY)
-				break;
 			goto search_again;
 		}
 	}
@@ -3297,7 +3265,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 
 	/* Reached end of directory/root. Bump pos past the last item. */
 	if (key_type == BTRFS_DIR_INDEX_KEY)
-		filp->f_pos = INT_LIMIT(off_t);
+		filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
 	else
 		filp->f_pos++;
 nopos:
@@ -3490,14 +3458,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 		root->highest_inode = objectid;
 
 	inode->i_uid = current_fsuid();
-
-	if (dir && (dir->i_mode & S_ISGID)) {
-		inode->i_gid = dir->i_gid;
-		if (S_ISDIR(mode))
-			mode |= S_ISGID;
-	} else
-		inode->i_gid = current_fsgid();
-
+	inode->i_gid = current_fsgid();
 	inode->i_mode = mode;
 	inode->i_ino = objectid;
 	inode_set_bytes(inode, 0);
@@ -3625,7 +3586,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 	if (IS_ERR(inode))
 		goto out_unlock;
 
-	err = btrfs_init_inode_security(inode, dir);
+	err = btrfs_init_acl(inode, dir);
 	if (err) {
 		drop_inode = 1;
 		goto out_unlock;
@@ -3688,7 +3649,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 	if (IS_ERR(inode))
 		goto out_unlock;
 
-	err = btrfs_init_inode_security(inode, dir);
+	err = btrfs_init_acl(inode, dir);
 	if (err) {
 		drop_inode = 1;
 		goto out_unlock;
@@ -3811,7 +3772,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	drop_on_err = 1;
 
-	err = btrfs_init_inode_security(inode, dir);
+	err = btrfs_init_acl(inode, dir);
 	if (err)
 		goto out_fail;
 
@@ -4197,10 +4158,9 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 	return -EINVAL;
 }
 
-static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-		__u64 start, __u64 len)
+static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
 {
-	return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
+	return extent_bmap(mapping, iblock, btrfs_get_extent);
 }
 
 int btrfs_readpage(struct file *file, struct page *page)
@@ -4773,7 +4733,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	if (IS_ERR(inode))
 		goto out_unlock;
 
-	err = btrfs_init_inode_security(inode, dir);
+	err = btrfs_init_acl(inode, dir);
 	if (err) {
 		drop_inode = 1;
 		goto out_unlock;
@@ -5027,24 +4987,13 @@ static struct extent_io_ops btrfs_extent_io_ops = {
 	.clear_bit_hook = btrfs_clear_bit_hook,
 };
 
-/*
- * btrfs doesn't support the bmap operation because swapfiles
- * use bmap to make a mapping of extents in the file.  They assume
- * these extents won't change over the life of the file and they
- * use the bmap result to do IO directly to the drive.
- *
- * the btrfs bmap call would return logical addresses that aren't
- * suitable for IO and they also will change frequently as COW
- * operations happen.  So, swapfile + btrfs == corruption.
- *
- * For now we're avoiding this by dropping bmap.
- */
 static struct address_space_operations btrfs_aops = {
 	.readpage	= btrfs_readpage,
 	.writepage	= btrfs_writepage,
 	.writepages	= btrfs_writepages,
 	.readpages	= btrfs_readpages,
 	.sync_page	= block_sync_page,
+	.bmap		= btrfs_bmap,
 	.direct_IO	= btrfs_direct_IO,
 	.invalidatepage = btrfs_invalidatepage,
 	.releasepage	= btrfs_releasepage,
@@ -5068,7 +5017,6 @@ static struct inode_operations btrfs_file_inode_operations = {
 	.removexattr	= btrfs_removexattr,
 	.permission	= btrfs_permission,
 	.fallocate	= btrfs_fallocate,
-	.fiemap		= btrfs_fiemap,
 };
 static struct inode_operations btrfs_special_inode_operations = {
 	.getattr	= btrfs_getattr,
@@ -5084,8 +5032,4 @@ static struct inode_operations btrfs_symlink_inode_operations = {
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
 	.permission	= btrfs_permission,
-	.setxattr	= btrfs_setxattr,
-	.getxattr	= btrfs_getxattr,
-	.listxattr	= btrfs_listxattr,
-	.removexattr	= btrfs_removexattr,
 };
diff --git a/trunk/fs/btrfs/ioctl.c b/trunk/fs/btrfs/ioctl.c
index 988fdc8b49eb..c2aa33e3feb5 100644
--- a/trunk/fs/btrfs/ioctl.c
+++ b/trunk/fs/btrfs/ioctl.c
@@ -38,6 +38,7 @@
 #include <linux/compat.h>
 #include <linux/bit_spinlock.h>
 #include <linux/security.h>
+#include <linux/version.h>
 #include <linux/xattr.h>
 #include <linux/vmalloc.h>
 #include "compat.h"
diff --git a/trunk/fs/btrfs/locking.c b/trunk/fs/btrfs/locking.c
index 68fd9ccf1805..39bae7761db6 100644
--- a/trunk/fs/btrfs/locking.c
+++ b/trunk/fs/btrfs/locking.c
@@ -26,215 +26,45 @@
 #include "locking.h"
 
 /*
- * btrfs_header_level() isn't free, so don't call it when lockdep isn't
- * on
- */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-static inline void spin_nested(struct extent_buffer *eb)
-{
-	spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb));
-}
-#else
-static inline void spin_nested(struct extent_buffer *eb)
-{
-	spin_lock(&eb->lock);
-}
-#endif
-
-/*
- * Setting a lock to blocking will drop the spinlock and set the
- * flag that forces other procs who want the lock to wait.  After
- * this you can safely schedule with the lock held.
- */
-void btrfs_set_lock_blocking(struct extent_buffer *eb)
-{
-	if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
-		set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
-		spin_unlock(&eb->lock);
-	}
-	/* exit with the spin lock released and the bit set */
-}
-
-/*
- * clearing the blocking flag will take the spinlock again.
- * After this you can't safely schedule
- */
-void btrfs_clear_lock_blocking(struct extent_buffer *eb)
-{
-	if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
-		spin_nested(eb);
-		clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
-		smp_mb__after_clear_bit();
-	}
-	/* exit with the spin lock held */
-}
-
-/*
- * unfortunately, many of the places that currently set a lock to blocking
- * don't end up blocking for every long, and often they don't block
- * at all.  For a dbench 50 run, if we don't spin one the blocking bit
- * at all, the context switch rate can jump up to 400,000/sec or more.
- *
- * So, we're still stuck with this crummy spin on the blocking bit,
- * at least until the most common causes of the short blocks
- * can be dealt with.
- */
-static int btrfs_spin_on_block(struct extent_buffer *eb)
-{
-	int i;
-	for (i = 0; i < 512; i++) {
-		cpu_relax();
-		if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-			return 1;
-		if (need_resched())
-			break;
-	}
-	return 0;
-}
-
-/*
- * This is somewhat different from trylock.  It will take the
- * spinlock but if it finds the lock is set to blocking, it will
- * return without the lock held.
+ * locks the per buffer mutex in an extent buffer.  This uses adaptive locks
+ * and the spin is not tuned very extensively.  The spinning does make a big
+ * difference in almost every workload, but spinning for the right amount of
+ * time needs some help.
  *
- * returns 1 if it was able to take the lock and zero otherwise
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
+ * In general, we want to spin as long as the lock holder is doing btree
+ * searches, and we should give up if they are in more expensive code.
  */
-int btrfs_try_spin_lock(struct extent_buffer *eb)
-{
-	int i;
-
-	spin_nested(eb);
-	if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-		return 1;
-	spin_unlock(&eb->lock);
 
-	/* spin for a bit on the BLOCKING flag */
-	for (i = 0; i < 2; i++) {
-		if (!btrfs_spin_on_block(eb))
-			break;
-
-		spin_nested(eb);
-		if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-			return 1;
-		spin_unlock(&eb->lock);
-	}
-	return 0;
-}
-
-/*
- * the autoremove wake function will return 0 if it tried to wake up
- * a process that was already awake, which means that process won't
- * count as an exclusive wakeup.  The waitq code will continue waking
- * procs until it finds one that was actually sleeping.
- *
- * For btrfs, this isn't quite what we want.  We want a single proc
- * to be notified that the lock is ready for taking.  If that proc
- * already happen to be awake, great, it will loop around and try for
- * the lock.
- *
- * So, btrfs_wake_function always returns 1, even when the proc that we
- * tried to wake up was already awake.
- */
-static int btrfs_wake_function(wait_queue_t *wait, unsigned mode,
-			       int sync, void *key)
-{
-	autoremove_wake_function(wait, mode, sync, key);
-	return 1;
-}
-
-/*
- * returns with the extent buffer spinlocked.
- *
- * This will spin and/or wait as required to take the lock, and then
- * return with the spinlock held.
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
- */
 int btrfs_tree_lock(struct extent_buffer *eb)
 {
-	DEFINE_WAIT(wait);
-	wait.func = btrfs_wake_function;
-
-	while(1) {
-		spin_nested(eb);
+	int i;
 
-		/* nobody is blocking, exit with the spinlock held */
-		if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
+	if (mutex_trylock(&eb->mutex))
+		return 0;
+	for (i = 0; i < 512; i++) {
+		cpu_relax();
+		if (mutex_trylock(&eb->mutex))
 			return 0;
-
-		/*
-		 * we have the spinlock, but the real owner is blocking.
-		 * wait for them
-		 */
-		spin_unlock(&eb->lock);
-
-		/*
-		 * spin for a bit, and if the blocking flag goes away,
-		 * loop around
-		 */
-		if (btrfs_spin_on_block(eb))
-			continue;
-
-		prepare_to_wait_exclusive(&eb->lock_wq, &wait,
-					  TASK_UNINTERRUPTIBLE);
-
-		if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-			schedule();
-
-		finish_wait(&eb->lock_wq, &wait);
 	}
+	cpu_relax();
+	mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb));
 	return 0;
 }
 
-/*
- * Very quick trylock, this does not spin or schedule.  It returns
- * 1 with the spinlock held if it was able to take the lock, or it
- * returns zero if it was unable to take the lock.
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
- */
 int btrfs_try_tree_lock(struct extent_buffer *eb)
 {
-	if (spin_trylock(&eb->lock)) {
-		if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
-			/*
-			 * we've got the spinlock, but the real owner is
-			 * blocking.  Drop the spinlock and return failure
-			 */
-			spin_unlock(&eb->lock);
-			return 0;
-		}
-		return 1;
-	}
-	/* someone else has the spinlock giveup */
-	return 0;
+	return mutex_trylock(&eb->mutex);
 }
 
 int btrfs_tree_unlock(struct extent_buffer *eb)
 {
-	/*
-	 * if we were a blocking owner, we don't have the spinlock held
-	 * just clear the bit and look for waiters
-	 */
-	if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-		smp_mb__after_clear_bit();
-	else
-		spin_unlock(&eb->lock);
-
-	if (waitqueue_active(&eb->lock_wq))
-		wake_up(&eb->lock_wq);
+	mutex_unlock(&eb->mutex);
 	return 0;
 }
 
 int btrfs_tree_locked(struct extent_buffer *eb)
 {
-	return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) ||
-			spin_is_locked(&eb->lock);
+	return mutex_is_locked(&eb->mutex);
 }
 
 /*
@@ -245,14 +75,12 @@ int btrfs_path_lock_waiting(struct btrfs_path *path, int level)
 {
 	int i;
 	struct extent_buffer *eb;
-
 	for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) {
 		eb = path->nodes[i];
 		if (!eb)
 			break;
 		smp_mb();
-		if (spin_is_contended(&eb->lock) ||
-		    waitqueue_active(&eb->lock_wq))
+		if (!list_empty(&eb->mutex.wait_list))
 			return 1;
 	}
 	return 0;
diff --git a/trunk/fs/btrfs/locking.h b/trunk/fs/btrfs/locking.h
index d92e707f5870..bc1faef12519 100644
--- a/trunk/fs/btrfs/locking.h
+++ b/trunk/fs/btrfs/locking.h
@@ -22,12 +22,6 @@
 int btrfs_tree_lock(struct extent_buffer *eb);
 int btrfs_tree_unlock(struct extent_buffer *eb);
 int btrfs_tree_locked(struct extent_buffer *eb);
-
 int btrfs_try_tree_lock(struct extent_buffer *eb);
-int btrfs_try_spin_lock(struct extent_buffer *eb);
-
 int btrfs_path_lock_waiting(struct btrfs_path *path, int level);
-
-void btrfs_set_lock_blocking(struct extent_buffer *eb);
-void btrfs_clear_lock_blocking(struct extent_buffer *eb);
 #endif
diff --git a/trunk/fs/btrfs/ordered-data.c b/trunk/fs/btrfs/ordered-data.c
index 77c2411a5f0f..a20940170274 100644
--- a/trunk/fs/btrfs/ordered-data.c
+++ b/trunk/fs/btrfs/ordered-data.c
@@ -613,6 +613,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
 	struct btrfs_sector_sum *sector_sums;
 	struct btrfs_ordered_extent *ordered;
 	struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
+	struct list_head *cur;
 	unsigned long num_sectors;
 	unsigned long i;
 	u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
@@ -623,7 +624,8 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
 		return 1;
 
 	mutex_lock(&tree->mutex);
-	list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
+	list_for_each_prev(cur, &ordered->list) {
+		ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list);
 		if (disk_bytenr >= ordered_sum->bytenr) {
 			num_sectors = ordered_sum->len / sectorsize;
 			sector_sums = ordered_sum->sums;
diff --git a/trunk/fs/btrfs/ref-cache.c b/trunk/fs/btrfs/ref-cache.c
index d0cc62bccb94..6f0acc4c9eab 100644
--- a/trunk/fs/btrfs/ref-cache.c
+++ b/trunk/fs/btrfs/ref-cache.c
@@ -17,7 +17,6 @@
  */
 
 #include <linux/sched.h>
-#include <linux/sort.h>
 #include "ctree.h"
 #include "ref-cache.h"
 #include "transaction.h"
diff --git a/trunk/fs/btrfs/ref-cache.h b/trunk/fs/btrfs/ref-cache.h
index bc283ad2db73..16f3183d7c59 100644
--- a/trunk/fs/btrfs/ref-cache.h
+++ b/trunk/fs/btrfs/ref-cache.h
@@ -73,4 +73,5 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
 int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
 			   int shared);
 int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
+
 #endif
diff --git a/trunk/fs/btrfs/super.c b/trunk/fs/btrfs/super.c
index f3fd7e2cbc38..db9fb3bc1e33 100644
--- a/trunk/fs/btrfs/super.c
+++ b/trunk/fs/btrfs/super.c
@@ -37,6 +37,7 @@
 #include <linux/ctype.h>
 #include <linux/namei.h>
 #include <linux/miscdevice.h>
+#include <linux/version.h>
 #include <linux/magic.h>
 #include "compat.h"
 #include "ctree.h"
@@ -582,18 +583,17 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 	struct btrfs_ioctl_vol_args *vol;
 	struct btrfs_fs_devices *fs_devices;
 	int ret = -ENOTTY;
+	int len;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
 	vol = kmalloc(sizeof(*vol), GFP_KERNEL);
-	if (!vol)
-		return -ENOMEM;
-
 	if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) {
 		ret = -EFAULT;
 		goto out;
 	}
+	len = strnlen(vol->name, BTRFS_PATH_NAME_MAX);
 
 	switch (cmd) {
 	case BTRFS_IOC_SCAN_DEV:
diff --git a/trunk/fs/btrfs/transaction.c b/trunk/fs/btrfs/transaction.c
index 919172de5c9a..8a08f9443340 100644
--- a/trunk/fs/btrfs/transaction.c
+++ b/trunk/fs/btrfs/transaction.c
@@ -852,9 +852,11 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_pending_snapshot *pending;
 	struct list_head *head = &trans->transaction->pending_snapshots;
+	struct list_head *cur;
 	int ret;
 
-	list_for_each_entry(pending, head, list) {
+	list_for_each(cur, head) {
+		pending = list_entry(cur, struct btrfs_pending_snapshot, list);
 		ret = create_pending_snapshot(trans, fs_info, pending);
 		BUG_ON(ret);
 	}
diff --git a/trunk/fs/btrfs/tree-defrag.c b/trunk/fs/btrfs/tree-defrag.c
index 98d25fa4570e..3e8358c36165 100644
--- a/trunk/fs/btrfs/tree-defrag.c
+++ b/trunk/fs/btrfs/tree-defrag.c
@@ -74,7 +74,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 		u32 nritems;
 
 		root_node = btrfs_lock_root_node(root);
-		btrfs_set_lock_blocking(root_node);
 		nritems = btrfs_header_nritems(root_node);
 		root->defrag_max.objectid = 0;
 		/* from above we know this is not a leaf */
diff --git a/trunk/fs/btrfs/tree-log.c b/trunk/fs/btrfs/tree-log.c
index 20794290256b..d81cda2e077c 100644
--- a/trunk/fs/btrfs/tree-log.c
+++ b/trunk/fs/btrfs/tree-log.c
@@ -77,6 +77,104 @@ static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
  * and once to do all the other items.
  */
 
+/*
+ * btrfs_add_log_tree adds a new per-subvolume log tree into the
+ * tree of log tree roots.  This must be called with a tree log transaction
+ * running (see start_log_trans).
+ */
+static int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
+		      struct btrfs_root *root)
+{
+	struct btrfs_key key;
+	struct btrfs_root_item root_item;
+	struct btrfs_inode_item *inode_item;
+	struct extent_buffer *leaf;
+	struct btrfs_root *new_root = root;
+	int ret;
+	u64 objectid = root->root_key.objectid;
+
+	leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
+				      BTRFS_TREE_LOG_OBJECTID,
+				      trans->transid, 0, 0, 0);
+	if (IS_ERR(leaf)) {
+		ret = PTR_ERR(leaf);
+		return ret;
+	}
+
+	btrfs_set_header_nritems(leaf, 0);
+	btrfs_set_header_level(leaf, 0);
+	btrfs_set_header_bytenr(leaf, leaf->start);
+	btrfs_set_header_generation(leaf, trans->transid);
+	btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
+
+	write_extent_buffer(leaf, root->fs_info->fsid,
+			    (unsigned long)btrfs_header_fsid(leaf),
+			    BTRFS_FSID_SIZE);
+	btrfs_mark_buffer_dirty(leaf);
+
+	inode_item = &root_item.inode;
+	memset(inode_item, 0, sizeof(*inode_item));
+	inode_item->generation = cpu_to_le64(1);
+	inode_item->size = cpu_to_le64(3);
+	inode_item->nlink = cpu_to_le32(1);
+	inode_item->nbytes = cpu_to_le64(root->leafsize);
+	inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
+
+	btrfs_set_root_bytenr(&root_item, leaf->start);
+	btrfs_set_root_generation(&root_item, trans->transid);
+	btrfs_set_root_level(&root_item, 0);
+	btrfs_set_root_refs(&root_item, 0);
+	btrfs_set_root_used(&root_item, 0);
+
+	memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
+	root_item.drop_level = 0;
+
+	btrfs_tree_unlock(leaf);
+	free_extent_buffer(leaf);
+	leaf = NULL;
+
+	btrfs_set_root_dirid(&root_item, 0);
+
+	key.objectid = BTRFS_TREE_LOG_OBJECTID;
+	key.offset = objectid;
+	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
+	ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key,
+				&root_item);
+	if (ret)
+		goto fail;
+
+	new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree,
+					       &key);
+	BUG_ON(!new_root);
+
+	WARN_ON(root->log_root);
+	root->log_root = new_root;
+
+	/*
+	 * log trees do not get reference counted because they go away
+	 * before a real commit is actually done.  They do store pointers
+	 * to file data extents, and those reference counts still get
+	 * updated (along with back refs to the log tree).
+	 */
+	new_root->ref_cows = 0;
+	new_root->last_trans = trans->transid;
+
+	/*
+	 * we need to make sure the root block for this new tree
+	 * is marked as dirty in the dirty_log_pages tree.  This
+	 * is how it gets flushed down to disk at tree log commit time.
+	 *
+	 * the tree logging mutex keeps others from coming in and changing
+	 * the new_root->node, so we can safely access it here
+	 */
+	set_extent_dirty(&new_root->dirty_log_pages, new_root->node->start,
+			 new_root->node->start + new_root->node->len - 1,
+			 GFP_NOFS);
+
+fail:
+	return ret;
+}
+
 /*
  * start a sub transaction and setup the log tree
  * this increments the log tree writer count to make the people
@@ -86,14 +184,6 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root)
 {
 	int ret;
-
-	mutex_lock(&root->log_mutex);
-	if (root->log_root) {
-		root->log_batch++;
-		atomic_inc(&root->log_writers);
-		mutex_unlock(&root->log_mutex);
-		return 0;
-	}
 	mutex_lock(&root->fs_info->tree_log_mutex);
 	if (!root->fs_info->log_root_tree) {
 		ret = btrfs_init_log_root_tree(trans, root->fs_info);
@@ -103,10 +193,9 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
 		ret = btrfs_add_log_tree(trans, root);
 		BUG_ON(ret);
 	}
+	atomic_inc(&root->fs_info->tree_log_writers);
+	root->fs_info->tree_log_batch++;
 	mutex_unlock(&root->fs_info->tree_log_mutex);
-	root->log_batch++;
-	atomic_inc(&root->log_writers);
-	mutex_unlock(&root->log_mutex);
 	return 0;
 }
 
@@ -123,12 +212,13 @@ static int join_running_log_trans(struct btrfs_root *root)
 	if (!root->log_root)
 		return -ENOENT;
 
-	mutex_lock(&root->log_mutex);
+	mutex_lock(&root->fs_info->tree_log_mutex);
 	if (root->log_root) {
 		ret = 0;
-		atomic_inc(&root->log_writers);
+		atomic_inc(&root->fs_info->tree_log_writers);
+		root->fs_info->tree_log_batch++;
 	}
-	mutex_unlock(&root->log_mutex);
+	mutex_unlock(&root->fs_info->tree_log_mutex);
 	return ret;
 }
 
@@ -138,11 +228,10 @@ static int join_running_log_trans(struct btrfs_root *root)
  */
 static int end_log_trans(struct btrfs_root *root)
 {
-	if (atomic_dec_and_test(&root->log_writers)) {
-		smp_mb();
-		if (waitqueue_active(&root->log_writer_wait))
-			wake_up(&root->log_writer_wait);
-	}
+	atomic_dec(&root->fs_info->tree_log_writers);
+	smp_mb();
+	if (waitqueue_active(&root->fs_info->tree_log_wait))
+		wake_up(&root->fs_info->tree_log_wait);
 	return 0;
 }
 
@@ -1615,7 +1704,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
 				btrfs_tree_lock(next);
 				clean_tree_block(trans, root, next);
-				btrfs_set_lock_blocking(next);
 				btrfs_wait_tree_block_writeback(next);
 				btrfs_tree_unlock(next);
 
@@ -1662,7 +1750,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 		next = path->nodes[*level];
 		btrfs_tree_lock(next);
 		clean_tree_block(trans, root, next);
-		btrfs_set_lock_blocking(next);
 		btrfs_wait_tree_block_writeback(next);
 		btrfs_tree_unlock(next);
 
@@ -1720,7 +1807,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 
 				btrfs_tree_lock(next);
 				clean_tree_block(trans, root, next);
-				btrfs_set_lock_blocking(next);
 				btrfs_wait_tree_block_writeback(next);
 				btrfs_tree_unlock(next);
 
@@ -1793,7 +1879,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
 
 			btrfs_tree_lock(next);
 			clean_tree_block(trans, log, next);
-			btrfs_set_lock_blocking(next);
 			btrfs_wait_tree_block_writeback(next);
 			btrfs_tree_unlock(next);
 
@@ -1817,65 +1902,26 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
 		}
 	}
 	btrfs_free_path(path);
+	if (wc->free)
+		free_extent_buffer(log->node);
 	return ret;
 }
 
-/*
- * helper function to update the item for a given subvolumes log root
- * in the tree of log roots
- */
-static int update_log_root(struct btrfs_trans_handle *trans,
-			   struct btrfs_root *log)
-{
-	int ret;
-
-	if (log->log_transid == 1) {
-		/* insert root item on the first sync */
-		ret = btrfs_insert_root(trans, log->fs_info->log_root_tree,
-				&log->root_key, &log->root_item);
-	} else {
-		ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
-				&log->root_key, &log->root_item);
-	}
-	return ret;
-}
-
-static int wait_log_commit(struct btrfs_root *root, unsigned long transid)
+static int wait_log_commit(struct btrfs_root *log)
 {
 	DEFINE_WAIT(wait);
-	int index = transid % 2;
+	u64 transid = log->fs_info->tree_log_transid;
 
-	/*
-	 * we only allow two pending log transactions at a time,
-	 * so we know that if ours is more than 2 older than the
-	 * current transaction, we're done
-	 */
 	do {
-		prepare_to_wait(&root->log_commit_wait[index],
-				&wait, TASK_UNINTERRUPTIBLE);
-		mutex_unlock(&root->log_mutex);
-		if (root->log_transid < transid + 2 &&
-		    atomic_read(&root->log_commit[index]))
+		prepare_to_wait(&log->fs_info->tree_log_wait, &wait,
+				TASK_UNINTERRUPTIBLE);
+		mutex_unlock(&log->fs_info->tree_log_mutex);
+		if (atomic_read(&log->fs_info->tree_log_commit))
 			schedule();
-		finish_wait(&root->log_commit_wait[index], &wait);
-		mutex_lock(&root->log_mutex);
-	} while (root->log_transid < transid + 2 &&
-		 atomic_read(&root->log_commit[index]));
-	return 0;
-}
-
-static int wait_for_writer(struct btrfs_root *root)
-{
-	DEFINE_WAIT(wait);
-	while (atomic_read(&root->log_writers)) {
-		prepare_to_wait(&root->log_writer_wait,
-				&wait, TASK_UNINTERRUPTIBLE);
-		mutex_unlock(&root->log_mutex);
-		if (atomic_read(&root->log_writers))
-			schedule();
-		mutex_lock(&root->log_mutex);
-		finish_wait(&root->log_writer_wait, &wait);
-	}
+		finish_wait(&log->fs_info->tree_log_wait, &wait);
+		mutex_lock(&log->fs_info->tree_log_mutex);
+	} while (transid == log->fs_info->tree_log_transid &&
+		atomic_read(&log->fs_info->tree_log_commit));
 	return 0;
 }
 
@@ -1887,114 +1933,57 @@ static int wait_for_writer(struct btrfs_root *root)
 int btrfs_sync_log(struct btrfs_trans_handle *trans,
 		   struct btrfs_root *root)
 {
-	int index1;
-	int index2;
 	int ret;
+	unsigned long batch;
 	struct btrfs_root *log = root->log_root;
-	struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
 
-	mutex_lock(&root->log_mutex);
-	index1 = root->log_transid % 2;
-	if (atomic_read(&root->log_commit[index1])) {
-		wait_log_commit(root, root->log_transid);
-		mutex_unlock(&root->log_mutex);
-		return 0;
+	mutex_lock(&log->fs_info->tree_log_mutex);
+	if (atomic_read(&log->fs_info->tree_log_commit)) {
+		wait_log_commit(log);
+		goto out;
 	}
-	atomic_set(&root->log_commit[index1], 1);
-
-	/* wait for previous tree log sync to complete */
-	if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
-		wait_log_commit(root, root->log_transid - 1);
+	atomic_set(&log->fs_info->tree_log_commit, 1);
 
 	while (1) {
-		unsigned long batch = root->log_batch;
-		mutex_unlock(&root->log_mutex);
+		batch = log->fs_info->tree_log_batch;
+		mutex_unlock(&log->fs_info->tree_log_mutex);
 		schedule_timeout_uninterruptible(1);
-		mutex_lock(&root->log_mutex);
-		wait_for_writer(root);
-		if (batch == root->log_batch)
+		mutex_lock(&log->fs_info->tree_log_mutex);
+
+		while (atomic_read(&log->fs_info->tree_log_writers)) {
+			DEFINE_WAIT(wait);
+			prepare_to_wait(&log->fs_info->tree_log_wait, &wait,
+					TASK_UNINTERRUPTIBLE);
+			mutex_unlock(&log->fs_info->tree_log_mutex);
+			if (atomic_read(&log->fs_info->tree_log_writers))
+				schedule();
+			mutex_lock(&log->fs_info->tree_log_mutex);
+			finish_wait(&log->fs_info->tree_log_wait, &wait);
+		}
+		if (batch == log->fs_info->tree_log_batch)
 			break;
 	}
 
 	ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
 	BUG_ON(ret);
-
-	btrfs_set_root_bytenr(&log->root_item, log->node->start);
-	btrfs_set_root_generation(&log->root_item, trans->transid);
-	btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
-
-	root->log_batch = 0;
-	root->log_transid++;
-	log->log_transid = root->log_transid;
-	smp_mb();
-	/*
-	 * log tree has been flushed to disk, new modifications of
-	 * the log will be written to new positions. so it's safe to
-	 * allow log writers to go in.
-	 */
-	mutex_unlock(&root->log_mutex);
-
-	mutex_lock(&log_root_tree->log_mutex);
-	log_root_tree->log_batch++;
-	atomic_inc(&log_root_tree->log_writers);
-	mutex_unlock(&log_root_tree->log_mutex);
-
-	ret = update_log_root(trans, log);
-	BUG_ON(ret);
-
-	mutex_lock(&log_root_tree->log_mutex);
-	if (atomic_dec_and_test(&log_root_tree->log_writers)) {
-		smp_mb();
-		if (waitqueue_active(&log_root_tree->log_writer_wait))
-			wake_up(&log_root_tree->log_writer_wait);
-	}
-
-	index2 = log_root_tree->log_transid % 2;
-	if (atomic_read(&log_root_tree->log_commit[index2])) {
-		wait_log_commit(log_root_tree, log_root_tree->log_transid);
-		mutex_unlock(&log_root_tree->log_mutex);
-		goto out;
-	}
-	atomic_set(&log_root_tree->log_commit[index2], 1);
-
-	if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2]))
-		wait_log_commit(log_root_tree, log_root_tree->log_transid - 1);
-
-	wait_for_writer(log_root_tree);
-
-	ret = btrfs_write_and_wait_marked_extents(log_root_tree,
-				&log_root_tree->dirty_log_pages);
+	ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree,
+			       &root->fs_info->log_root_tree->dirty_log_pages);
 	BUG_ON(ret);
 
 	btrfs_set_super_log_root(&root->fs_info->super_for_commit,
-				log_root_tree->node->start);
+				 log->fs_info->log_root_tree->node->start);
 	btrfs_set_super_log_root_level(&root->fs_info->super_for_commit,
-				btrfs_header_level(log_root_tree->node));
-
-	log_root_tree->log_batch = 0;
-	log_root_tree->log_transid++;
-	smp_mb();
-
-	mutex_unlock(&log_root_tree->log_mutex);
-
-	/*
-	 * nobody else is going to jump in and write the the ctree
-	 * super here because the log_commit atomic below is protecting
-	 * us.  We must be called with a transaction handle pinning
-	 * the running transaction open, so a full commit can't hop
-	 * in and cause problems either.
-	 */
-	write_ctree_super(trans, root->fs_info->tree_root, 2);
+		       btrfs_header_level(log->fs_info->log_root_tree->node));
 
-	atomic_set(&log_root_tree->log_commit[index2], 0);
+	write_ctree_super(trans, log->fs_info->tree_root, 2);
+	log->fs_info->tree_log_transid++;
+	log->fs_info->tree_log_batch = 0;
+	atomic_set(&log->fs_info->tree_log_commit, 0);
 	smp_mb();
-	if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
-		wake_up(&log_root_tree->log_commit_wait[index2]);
+	if (waitqueue_active(&log->fs_info->tree_log_wait))
+		wake_up(&log->fs_info->tree_log_wait);
 out:
-	atomic_set(&root->log_commit[index1], 0);
-	smp_mb();
-	if (waitqueue_active(&root->log_commit_wait[index1]))
-		wake_up(&root->log_commit_wait[index1]);
+	mutex_unlock(&log->fs_info->tree_log_mutex);
 	return 0;
 }
 
@@ -2030,17 +2019,37 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
 				   start, end, GFP_NOFS);
 	}
 
-	if (log->log_transid > 0) {
-		ret = btrfs_del_root(trans, root->fs_info->log_root_tree,
-				     &log->root_key);
-		BUG_ON(ret);
-	}
+	log = root->log_root;
+	ret = btrfs_del_root(trans, root->fs_info->log_root_tree,
+			     &log->root_key);
+	BUG_ON(ret);
 	root->log_root = NULL;
-	free_extent_buffer(log->node);
-	kfree(log);
+	kfree(root->log_root);
 	return 0;
 }
 
+/*
+ * helper function to update the item for a given subvolumes log root
+ * in the tree of log roots
+ */
+static int update_log_root(struct btrfs_trans_handle *trans,
+			   struct btrfs_root *log)
+{
+	u64 bytenr = btrfs_root_bytenr(&log->root_item);
+	int ret;
+
+	if (log->node->start == bytenr)
+		return 0;
+
+	btrfs_set_root_bytenr(&log->root_item, log->node->start);
+	btrfs_set_root_generation(&log->root_item, trans->transid);
+	btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
+	ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
+				&log->root_key, &log->root_item);
+	BUG_ON(ret);
+	return ret;
+}
+
 /*
  * If both a file and directory are logged, and unlinks or renames are
  * mixed in, we have a few interesting corners:
@@ -2702,6 +2711,11 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans,
 
 	btrfs_free_path(path);
 	btrfs_free_path(dst_path);
+
+	mutex_lock(&root->fs_info->tree_log_mutex);
+	ret = update_log_root(trans, log);
+	BUG_ON(ret);
+	mutex_unlock(&root->fs_info->tree_log_mutex);
 out:
 	return 0;
 }
diff --git a/trunk/fs/btrfs/volumes.c b/trunk/fs/btrfs/volumes.c
index bcd14ebccae1..3451e1cca2b5 100644
--- a/trunk/fs/btrfs/volumes.c
+++ b/trunk/fs/btrfs/volumes.c
@@ -20,6 +20,7 @@
 #include <linux/buffer_head.h>
 #include <linux/blkdev.h>
 #include <linux/random.h>
+#include <linux/version.h>
 #include <asm/div64.h>
 #include "compat.h"
 #include "ctree.h"
@@ -103,8 +104,10 @@ static noinline struct btrfs_device *__find_device(struct list_head *head,
 						   u64 devid, u8 *uuid)
 {
 	struct btrfs_device *dev;
+	struct list_head *cur;
 
-	list_for_each_entry(dev, head, dev_list) {
+	list_for_each(cur, head) {
+		dev = list_entry(cur, struct btrfs_device, dev_list);
 		if (dev->devid == devid &&
 		    (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
 			return dev;
@@ -115,9 +118,11 @@ static noinline struct btrfs_device *__find_device(struct list_head *head,
 
 static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
 {
+	struct list_head *cur;
 	struct btrfs_fs_devices *fs_devices;
 
-	list_for_each_entry(fs_devices, &fs_uuids, list) {
+	list_for_each(cur, &fs_uuids) {
+		fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
 		if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
 			return fs_devices;
 	}
@@ -154,7 +159,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
 loop:
 	spin_lock(&device->io_lock);
 
-loop_lock:
 	/* take all the bios off the list at once and process them
 	 * later on (without the lock held).  But, remember the
 	 * tail and other pointers so the bios can be properly reinserted
@@ -204,7 +208,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
 		 * is now congested.  Back off and let other work structs
 		 * run instead
 		 */
-		if (pending && bdi_write_congested(bdi) && num_run > 16 &&
+		if (pending && bdi_write_congested(bdi) &&
 		    fs_info->fs_devices->open_devices > 1) {
 			struct bio *old_head;
 
@@ -216,8 +220,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
 				tail->bi_next = old_head;
 			else
 				device->pending_bio_tail = tail;
-
-			device->running_pending = 1;
+			device->running_pending = 0;
 
 			spin_unlock(&device->io_lock);
 			btrfs_requeue_work(&device->work);
@@ -226,11 +229,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
 	}
 	if (again)
 		goto loop;
-
-	spin_lock(&device->io_lock);
-	if (device->pending_bios)
-		goto loop_lock;
-	spin_unlock(&device->io_lock);
 done:
 	return 0;
 }
@@ -347,11 +345,14 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
 
 int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
 {
-	struct btrfs_device *device, *next;
+	struct list_head *tmp;
+	struct list_head *cur;
+	struct btrfs_device *device;
 
 	mutex_lock(&uuid_mutex);
 again:
-	list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
+	list_for_each_safe(cur, tmp, &fs_devices->devices) {
+		device = list_entry(cur, struct btrfs_device, dev_list);
 		if (device->in_fs_metadata)
 			continue;
 
@@ -382,12 +383,14 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
 
 static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 {
+	struct list_head *cur;
 	struct btrfs_device *device;
 
 	if (--fs_devices->opened > 0)
 		return 0;
 
-	list_for_each_entry(device, &fs_devices->devices, dev_list) {
+	list_for_each(cur, &fs_devices->devices) {
+		device = list_entry(cur, struct btrfs_device, dev_list);
 		if (device->bdev) {
 			close_bdev_exclusive(device->bdev, device->mode);
 			fs_devices->open_devices--;
@@ -436,6 +439,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 {
 	struct block_device *bdev;
 	struct list_head *head = &fs_devices->devices;
+	struct list_head *cur;
 	struct btrfs_device *device;
 	struct block_device *latest_bdev = NULL;
 	struct buffer_head *bh;
@@ -446,7 +450,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 	int seeding = 1;
 	int ret = 0;
 
-	list_for_each_entry(device, head, dev_list) {
+	list_for_each(cur, head) {
+		device = list_entry(cur, struct btrfs_device, dev_list);
 		if (device->bdev)
 			continue;
 		if (!device->name)
@@ -573,7 +578,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 		       *(unsigned long long *)disk_super->fsid,
 		       *(unsigned long long *)(disk_super->fsid + 8));
 	}
-	printk(KERN_CONT "devid %llu transid %llu %s\n",
+	printk(KERN_INFO "devid %llu transid %llu %s\n",
 	       (unsigned long long)devid, (unsigned long long)transid, path);
 	ret = device_list_add(path, disk_super, devid, fs_devices_ret);
 
@@ -1012,12 +1017,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 	}
 
 	if (strcmp(device_path, "missing") == 0) {
+		struct list_head *cur;
 		struct list_head *devices;
 		struct btrfs_device *tmp;
 
 		device = NULL;
 		devices = &root->fs_info->fs_devices->devices;
-		list_for_each_entry(tmp, devices, dev_list) {
+		list_for_each(cur, devices) {
+			tmp = list_entry(cur, struct btrfs_device, dev_list);
 			if (tmp->in_fs_metadata && !tmp->bdev) {
 				device = tmp;
 				break;
@@ -1273,6 +1280,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	struct btrfs_trans_handle *trans;
 	struct btrfs_device *device;
 	struct block_device *bdev;
+	struct list_head *cur;
 	struct list_head *devices;
 	struct super_block *sb = root->fs_info->sb;
 	u64 total_bytes;
@@ -1296,7 +1304,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	mutex_lock(&root->fs_info->volume_mutex);
 
 	devices = &root->fs_info->fs_devices->devices;
-	list_for_each_entry(device, devices, dev_list) {
+	list_for_each(cur, devices) {
+		device = list_entry(cur, struct btrfs_device, dev_list);
 		if (device->bdev == bdev) {
 			ret = -EEXIST;
 			goto error;
@@ -1695,6 +1704,7 @@ static u64 div_factor(u64 num, int factor)
 int btrfs_balance(struct btrfs_root *dev_root)
 {
 	int ret;
+	struct list_head *cur;
 	struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
 	struct btrfs_device *device;
 	u64 old_size;
@@ -1713,7 +1723,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
 	dev_root = dev_root->fs_info->dev_root;
 
 	/* step one make some room on all the devices */
-	list_for_each_entry(device, devices, dev_list) {
+	list_for_each(cur, devices) {
+		device = list_entry(cur, struct btrfs_device, dev_list);
 		old_size = device->total_bytes;
 		size_to_free = div_factor(old_size, 1);
 		size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
diff --git a/trunk/fs/btrfs/xattr.c b/trunk/fs/btrfs/xattr.c
index a9d3bf4d2689..7f332e270894 100644
--- a/trunk/fs/btrfs/xattr.c
+++ b/trunk/fs/btrfs/xattr.c
@@ -21,7 +21,6 @@
 #include <linux/slab.h>
 #include <linux/rwsem.h>
 #include <linux/xattr.h>
-#include <linux/security.h>
 #include "ctree.h"
 #include "btrfs_inode.h"
 #include "transaction.h"
@@ -46,12 +45,9 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
 	/* lookup the xattr by name */
 	di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name,
 				strlen(name), 0);
-	if (!di) {
+	if (!di || IS_ERR(di)) {
 		ret = -ENODATA;
 		goto out;
-	} else if (IS_ERR(di)) {
-		ret = PTR_ERR(di);
-		goto out;
 	}
 
 	leaf = path->nodes[0];
@@ -66,14 +62,6 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
 		ret = -ERANGE;
 		goto out;
 	}
-
-	/*
-	 * The way things are packed into the leaf is like this
-	 * |struct btrfs_dir_item|name|data|
-	 * where name is the xattr name, so security.foo, and data is the
-	 * content of the xattr.  data_ptr points to the location in memory
-	 * where the data starts in the in memory leaf
-	 */
 	data_ptr = (unsigned long)((char *)(di + 1) +
 				   btrfs_dir_name_len(leaf, di));
 	read_extent_buffer(leaf, buffer, data_ptr,
@@ -98,7 +86,7 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
 	if (!path)
 		return -ENOMEM;
 
-	trans = btrfs_join_transaction(root, 1);
+	trans = btrfs_start_transaction(root, 1);
 	btrfs_set_trans_block_group(trans, inode);
 
 	/* first lets see if we already have this xattr */
@@ -188,6 +176,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	if (ret < 0)
 		goto err;
+	ret = 0;
 	advance = 0;
 	while (1) {
 		leaf = path->nodes[0];
@@ -331,34 +320,3 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
 		return -EOPNOTSUPP;
 	return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
 }
-
-int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
-{
-	int err;
-	size_t len;
-	void *value;
-	char *suffix;
-	char *name;
-
-	err = security_inode_init_security(inode, dir, &suffix, &value, &len);
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			return 0;
-		return err;
-	}
-
-	name = kmalloc(XATTR_SECURITY_PREFIX_LEN + strlen(suffix) + 1,
-		       GFP_NOFS);
-	if (!name) {
-		err = -ENOMEM;
-	} else {
-		strcpy(name, XATTR_SECURITY_PREFIX);
-		strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
-		err = __btrfs_setxattr(inode, name, value, len, 0);
-		kfree(name);
-	}
-
-	kfree(suffix);
-	kfree(value);
-	return err;
-}
diff --git a/trunk/fs/btrfs/xattr.h b/trunk/fs/btrfs/xattr.h
index c71e9c3cf3f7..5b1d08f8e68d 100644
--- a/trunk/fs/btrfs/xattr.h
+++ b/trunk/fs/btrfs/xattr.h
@@ -36,6 +36,4 @@ extern int btrfs_setxattr(struct dentry *dentry, const char *name,
 		const void *value, size_t size, int flags);
 extern int btrfs_removexattr(struct dentry *dentry, const char *name);
 
-extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir);
-
 #endif /* __XATTR__ */
diff --git a/trunk/fs/buffer.c b/trunk/fs/buffer.c
index 665d446b25bc..b58208f1640a 100644
--- a/trunk/fs/buffer.c
+++ b/trunk/fs/buffer.c
@@ -2688,7 +2688,7 @@ int nobh_write_end(struct file *file, struct address_space *mapping,
 	struct buffer_head *bh;
 	BUG_ON(fsdata != NULL && page_has_buffers(page));
 
-	if (unlikely(copied < len) && head)
+	if (unlikely(copied < len) && !page_has_buffers(page))
 		attach_nobh_buffers(page, head);
 	if (page_has_buffers(page))
 		return generic_write_end(file, mapping, pos, len,
diff --git a/trunk/fs/compat.c b/trunk/fs/compat.c
index 65a070e705ab..d0145ca27572 100644
--- a/trunk/fs/compat.c
+++ b/trunk/fs/compat.c
@@ -1407,7 +1407,7 @@ int compat_do_execve(char * filename,
 	bprm->cred = prepare_exec_creds();
 	if (!bprm->cred)
 		goto out_unlock;
-	check_unsafe_exec(bprm);
+	check_unsafe_exec(bprm, current->files);
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
diff --git a/trunk/fs/ecryptfs/crypto.c b/trunk/fs/ecryptfs/crypto.c
index f6caeb1d1106..c01e043670e2 100644
--- a/trunk/fs/ecryptfs/crypto.c
+++ b/trunk/fs/ecryptfs/crypto.c
@@ -1716,7 +1716,7 @@ static int ecryptfs_copy_filename(char **copied_name, size_t *copied_name_size,
 {
 	int rc = 0;
 
-	(*copied_name) = kmalloc((name_size + 1), GFP_KERNEL);
+	(*copied_name) = kmalloc((name_size + 2), GFP_KERNEL);
 	if (!(*copied_name)) {
 		rc = -ENOMEM;
 		goto out;
@@ -1726,7 +1726,7 @@ static int ecryptfs_copy_filename(char **copied_name, size_t *copied_name_size,
 						 * in printing out the
 						 * string in debug
 						 * messages */
-	(*copied_name_size) = name_size;
+	(*copied_name_size) = (name_size + 1);
 out:
 	return rc;
 }
diff --git a/trunk/fs/exec.c b/trunk/fs/exec.c
index 0dd60a01f1b4..929b58004b7e 100644
--- a/trunk/fs/exec.c
+++ b/trunk/fs/exec.c
@@ -1049,16 +1049,32 @@ EXPORT_SYMBOL(install_exec_creds);
  * - the caller must hold current->cred_exec_mutex to protect against
  *   PTRACE_ATTACH
  */
-void check_unsafe_exec(struct linux_binprm *bprm)
+void check_unsafe_exec(struct linux_binprm *bprm, struct files_struct *files)
 {
-	struct task_struct *p = current;
+	struct task_struct *p = current, *t;
+	unsigned long flags;
+	unsigned n_fs, n_files, n_sighand;
 
 	bprm->unsafe = tracehook_unsafe_exec(p);
 
-	if (atomic_read(&p->fs->count) > 1 ||
-	    atomic_read(&p->files->count) > 1 ||
-	    atomic_read(&p->sighand->count) > 1)
+	n_fs = 1;
+	n_files = 1;
+	n_sighand = 1;
+	lock_task_sighand(p, &flags);
+	for (t = next_thread(p); t != p; t = next_thread(t)) {
+		if (t->fs == p->fs)
+			n_fs++;
+		if (t->files == files)
+			n_files++;
+		n_sighand++;
+	}
+
+	if (atomic_read(&p->fs->count) > n_fs ||
+	    atomic_read(&p->files->count) > n_files ||
+	    atomic_read(&p->sighand->count) > n_sighand)
 		bprm->unsafe |= LSM_UNSAFE_SHARE;
+
+	unlock_task_sighand(p, &flags);
 }
 
 /* 
@@ -1273,7 +1289,7 @@ int do_execve(char * filename,
 	bprm->cred = prepare_exec_creds();
 	if (!bprm->cred)
 		goto out_unlock;
-	check_unsafe_exec(bprm);
+	check_unsafe_exec(bprm, displaced);
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
diff --git a/trunk/fs/internal.h b/trunk/fs/internal.h
index 53af885f1732..0d8ac497b3d5 100644
--- a/trunk/fs/internal.h
+++ b/trunk/fs/internal.h
@@ -43,7 +43,7 @@ extern void __init chrdev_init(void);
 /*
  * exec.c
  */
-extern void check_unsafe_exec(struct linux_binprm *);
+extern void check_unsafe_exec(struct linux_binprm *, struct files_struct *);
 
 /*
  * namespace.c