diff --git a/[refs] b/[refs]
index 04c8376a5903..c393d0bcc5e0 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: c4755d16fce9a4cdbb316c17657444856821bd4b
+refs/heads/master: ff138171ec6f84f311fe8c0395ad7f9e6d04feec
diff --git a/trunk/arch/arm/mm/mmu.c b/trunk/arch/arm/mm/mmu.c
index 2d6d682c206a..d41a75ed3dce 100644
--- a/trunk/arch/arm/mm/mmu.c
+++ b/trunk/arch/arm/mm/mmu.c
@@ -35,7 +35,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
  * zero-initialized data and COW.
  */
 struct page *empty_zero_page;
-EXPORT_SYMBOL(empty_zero_page);
 
 /*
  * The pmd table for the upper-most set of pages.
diff --git a/trunk/arch/m68k/mm/init.c b/trunk/arch/m68k/mm/init.c
index d8fb9c5303cc..a2bb01f59642 100644
--- a/trunk/arch/m68k/mm/init.c
+++ b/trunk/arch/m68k/mm/init.c
@@ -69,7 +69,6 @@ void __init m68k_setup_node(int node)
  */
 
 void *empty_zero_page;
-EXPORT_SYMBOL(empty_zero_page);
 
 void show_mem(void)
 {
diff --git a/trunk/arch/sparc/kernel/sparc_ksyms.c b/trunk/arch/sparc/kernel/sparc_ksyms.c
index aa8ee06cf488..0bcf98a7ef38 100644
--- a/trunk/arch/sparc/kernel/sparc_ksyms.c
+++ b/trunk/arch/sparc/kernel/sparc_ksyms.c
@@ -282,5 +282,3 @@ EXPORT_SYMBOL(do_BUG);
 
 /* Sun Power Management Idle Handler */
 EXPORT_SYMBOL(pm_idle);
-
-EXPORT_SYMBOL(empty_zero_page);
diff --git a/trunk/arch/sparc64/mm/init.c b/trunk/arch/sparc64/mm/init.c
index 4cad0b32b0af..8c2b50e8abc6 100644
--- a/trunk/arch/sparc64/mm/init.c
+++ b/trunk/arch/sparc64/mm/init.c
@@ -160,7 +160,6 @@ extern unsigned int sparc_ramdisk_image;
 extern unsigned int sparc_ramdisk_size;
 
 struct page *mem_map_zero __read_mostly;
-EXPORT_SYMBOL(mem_map_zero);
 
 unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly;
 
diff --git a/trunk/drivers/edac/pasemi_edac.c b/trunk/drivers/edac/pasemi_edac.c
index 8e6b91bd2e99..3fd65a563848 100644
--- a/trunk/drivers/edac/pasemi_edac.c
+++ b/trunk/drivers/edac/pasemi_edac.c
@@ -26,7 +26,6 @@
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
 #include <linux/slab.h>
-#include <linux/edac.h>
 #include "edac_core.h"
 
 #define MODULE_NAME "pasemi_edac"
diff --git a/trunk/drivers/media/video/cx18/Kconfig b/trunk/drivers/media/video/cx18/Kconfig
index be654a27bd3c..acc4b47f1d1d 100644
--- a/trunk/drivers/media/video/cx18/Kconfig
+++ b/trunk/drivers/media/video/cx18/Kconfig
@@ -4,7 +4,7 @@ config VIDEO_CX18
 	select I2C_ALGOBIT
 	select FW_LOADER
 	select VIDEO_IR
-	select VIDEO_TUNER
+	select MEDIA_TUNER
 	select VIDEO_TVEEPROM
 	select VIDEO_CX2341X
 	select VIDEO_CS5345
diff --git a/trunk/drivers/media/video/cx18/cx18-driver.c b/trunk/drivers/media/video/cx18/cx18-driver.c
index 9f31befc3139..8f5ed9b4bf83 100644
--- a/trunk/drivers/media/video/cx18/cx18-driver.c
+++ b/trunk/drivers/media/video/cx18/cx18-driver.c
@@ -567,7 +567,7 @@ static void cx18_load_and_init_modules(struct cx18 *cx)
 	int i;
 
 	/* load modules */
-#ifndef CONFIG_VIDEO_TUNER
+#ifndef CONFIG_MEDIA_TUNER
 	hw = cx18_request_module(cx, hw, "tuner", CX18_HW_TUNER);
 #endif
 #ifndef CONFIG_VIDEO_CS5345
diff --git a/trunk/drivers/media/video/tuner-core.c b/trunk/drivers/media/video/tuner-core.c
index cc19c4abb467..6bf104ea051d 100644
--- a/trunk/drivers/media/video/tuner-core.c
+++ b/trunk/drivers/media/video/tuner-core.c
@@ -34,7 +34,7 @@
 #define PREFIX t->i2c->driver->driver.name
 
 /** This macro allows us to probe dynamically, avoiding static links */
-#ifdef CONFIG_DVB_CORE_ATTACH
+#ifdef CONFIG_MEDIA_ATTACH
 #define tuner_symbol_probe(FUNCTION, ARGS...) ({ \
 	int __r = -EINVAL; \
 	typeof(&FUNCTION) __a = symbol_request(FUNCTION); \
diff --git a/trunk/fs/ext3/inode.c b/trunk/fs/ext3/inode.c
index 6ae4ecf3ce40..cc47b76091bf 100644
--- a/trunk/fs/ext3/inode.c
+++ b/trunk/fs/ext3/inode.c
@@ -1261,11 +1261,10 @@ static int ext3_ordered_write_end(struct file *file,
 		new_i_size = pos + copied;
 		if (new_i_size > EXT3_I(inode)->i_disksize)
 			EXT3_I(inode)->i_disksize = new_i_size;
-		ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
+		copied = ext3_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
-		copied = ret2;
-		if (ret2 < 0)
-			ret = ret2;
+		if (copied < 0)
+			ret = copied;
 	}
 	ret2 = ext3_journal_stop(handle);
 	if (!ret)
@@ -1290,11 +1289,10 @@ static int ext3_writeback_write_end(struct file *file,
 	if (new_i_size > EXT3_I(inode)->i_disksize)
 		EXT3_I(inode)->i_disksize = new_i_size;
 
-	ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
+	copied = ext3_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
-	copied = ret2;
-	if (ret2 < 0)
-		ret = ret2;
+	if (copied < 0)
+		ret = copied;
 
 	ret2 = ext3_journal_stop(handle);
 	if (!ret)
diff --git a/trunk/fs/ext4/acl.c b/trunk/fs/ext4/acl.c
index 3c8dab880d91..a8bae8cd1d5d 100644
--- a/trunk/fs/ext4/acl.c
+++ b/trunk/fs/ext4/acl.c
@@ -9,8 +9,8 @@
 #include <linux/slab.h>
 #include <linux/capability.h>
 #include <linux/fs.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
+#include <linux/ext4_jbd2.h>
+#include <linux/ext4_fs.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -37,7 +37,7 @@ ext4_acl_from_disk(const void *value, size_t size)
 		return ERR_PTR(-EINVAL);
 	if (count == 0)
 		return NULL;
-	acl = posix_acl_alloc(count, GFP_NOFS);
+	acl = posix_acl_alloc(count, GFP_KERNEL);
 	if (!acl)
 		return ERR_PTR(-ENOMEM);
 	for (n=0; n < count; n++) {
@@ -91,7 +91,7 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
 
 	*size = ext4_acl_size(acl->a_count);
 	ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
-			sizeof(ext4_acl_entry), GFP_NOFS);
+			sizeof(ext4_acl_entry), GFP_KERNEL);
 	if (!ext_acl)
 		return ERR_PTR(-ENOMEM);
 	ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
@@ -187,7 +187,7 @@ ext4_get_acl(struct inode *inode, int type)
 	}
 	retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
-		value = kmalloc(retval, GFP_NOFS);
+		value = kmalloc(retval, GFP_KERNEL);
 		if (!value)
 			return ERR_PTR(-ENOMEM);
 		retval = ext4_xattr_get(inode, name_index, "", value, retval);
@@ -335,7 +335,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 			if (error)
 				goto cleanup;
 		}
-		clone = posix_acl_clone(acl, GFP_NOFS);
+		clone = posix_acl_clone(acl, GFP_KERNEL);
 		error = -ENOMEM;
 		if (!clone)
 			goto cleanup;
diff --git a/trunk/fs/ext4/balloc.c b/trunk/fs/ext4/balloc.c
index da994374ec3b..0737e05ba3dd 100644
--- a/trunk/fs/ext4/balloc.c
+++ b/trunk/fs/ext4/balloc.c
@@ -15,12 +15,12 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
-#include "ext4.h"
-#include "ext4_jbd2.h"
-#include "group.h"
 
+#include "group.h"
 /*
  * balloc.c contains the blocks allocation and deallocation routines
  */
@@ -48,6 +48,7 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 		 ext4_group_t block_group, struct ext4_group_desc *gdp)
 {
+	unsigned long start;
 	int bit, bit_max;
 	unsigned free_blocks, group_blocks;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -58,7 +59,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 		/* If checksum is bad mark all blocks used to prevent allocation
 		 * essentially implementing a per-group read-only flag. */
 		if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
-			ext4_error(sb, __func__,
+			ext4_error(sb, __FUNCTION__,
 				  "Checksum bad for group %lu\n", block_group);
 			gdp->bg_free_blocks_count = 0;
 			gdp->bg_free_inodes_count = 0;
@@ -105,12 +106,11 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 	free_blocks = group_blocks - bit_max;
 
 	if (bh) {
-		ext4_fsblk_t start;
-
 		for (bit = 0; bit < bit_max; bit++)
 			ext4_set_bit(bit, bh->b_data);
 
-		start = ext4_group_first_block_no(sb, block_group);
+		start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
+			le32_to_cpu(sbi->s_es->s_first_data_block);
 
 		/* Set bits for block and inode bitmaps, and inode table */
 		ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
@@ -235,7 +235,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
 		return 1;
 
 err_out:
-	ext4_error(sb, __func__,
+	ext4_error(sb, __FUNCTION__,
 			"Invalid block bitmap - "
 			"block_group = %d, block = %llu",
 			block_group, bitmap_blk);
@@ -264,7 +264,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 	bitmap_blk = ext4_block_bitmap(sb, desc);
 	bh = sb_getblk(sb, bitmap_blk);
 	if (unlikely(!bh)) {
-		ext4_error(sb, __func__,
+		ext4_error(sb, __FUNCTION__,
 			    "Cannot read block bitmap - "
 			    "block_group = %d, block_bitmap = %llu",
 			    (int)block_group, (unsigned long long)bitmap_blk);
@@ -281,7 +281,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 	}
 	if (bh_submit_read(bh) < 0) {
 		put_bh(bh);
-		ext4_error(sb, __func__,
+		ext4_error(sb, __FUNCTION__,
 			    "Cannot read block bitmap - "
 			    "block_group = %d, block_bitmap = %llu",
 			    (int)block_group, (unsigned long long)bitmap_blk);
@@ -360,7 +360,7 @@ static void __rsv_window_dump(struct rb_root *root, int verbose,
 		BUG();
 }
 #define rsv_window_dump(root, verbose) \
-	__rsv_window_dump((root), (verbose), __func__)
+	__rsv_window_dump((root), (verbose), __FUNCTION__)
 #else
 #define rsv_window_dump(root, verbose) do {} while (0)
 #endif
@@ -740,7 +740,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
 		if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
 						bit + i, bitmap_bh->b_data)) {
 			jbd_unlock_bh_state(bitmap_bh);
-			ext4_error(sb, __func__,
+			ext4_error(sb, __FUNCTION__,
 				   "bit already cleared for block %llu",
 				   (ext4_fsblk_t)(block + i));
 			jbd_lock_bh_state(bitmap_bh);
@@ -752,7 +752,9 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
 	jbd_unlock_bh_state(bitmap_bh);
 
 	spin_lock(sb_bgl_lock(sbi, block_group));
-	le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
+	desc->bg_free_blocks_count =
+		cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
+			group_freed);
 	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
 	spin_unlock(sb_bgl_lock(sbi, block_group));
 	percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -1796,7 +1798,7 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
 			if (ext4_test_bit(grp_alloc_blk+i,
 					bh2jh(bitmap_bh)->b_committed_data)) {
 				printk("%s: block was unexpectedly set in "
-					"b_committed_data\n", __func__);
+					"b_committed_data\n", __FUNCTION__);
 			}
 		}
 	}
@@ -1821,7 +1823,8 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
 	spin_lock(sb_bgl_lock(sbi, group_no));
 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-	le16_add_cpu(&gdp->bg_free_blocks_count, -num);
+	gdp->bg_free_blocks_count =
+			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
 	spin_unlock(sb_bgl_lock(sbi, group_no));
 	percpu_counter_sub(&sbi->s_freeblocks_counter, num);
diff --git a/trunk/fs/ext4/bitmap.c b/trunk/fs/ext4/bitmap.c
index d37ea6750454..420554f8f79d 100644
--- a/trunk/fs/ext4/bitmap.c
+++ b/trunk/fs/ext4/bitmap.c
@@ -9,7 +9,7 @@
 
 #include <linux/buffer_head.h>
 #include <linux/jbd2.h>
-#include "ext4.h"
+#include <linux/ext4_fs.h>
 
 #ifdef EXT4FS_DEBUG
 
diff --git a/trunk/fs/ext4/dir.c b/trunk/fs/ext4/dir.c
index 2bf0331ea194..2c23bade9aa6 100644
--- a/trunk/fs/ext4/dir.c
+++ b/trunk/fs/ext4/dir.c
@@ -23,10 +23,10 @@
 
 #include <linux/fs.h>
 #include <linux/jbd2.h>
+#include <linux/ext4_fs.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
 #include <linux/rbtree.h>
-#include "ext4.h"
 
 static unsigned char ext4_filetype_table[] = {
 	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -42,7 +42,7 @@ const struct file_operations ext4_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= ext4_readdir,		/* we take BKL. needed?*/
-	.unlocked_ioctl = ext4_ioctl,
+	.ioctl		= ext4_ioctl,		/* BKL held */
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext4_compat_ioctl,
 #endif
diff --git a/trunk/fs/ext4/ext4_jbd2.c b/trunk/fs/ext4/ext4_jbd2.c
index c75384b34f2c..d6afe4e27340 100644
--- a/trunk/fs/ext4/ext4_jbd2.c
+++ b/trunk/fs/ext4/ext4_jbd2.c
@@ -2,14 +2,14 @@
  * Interface between ext4 and JBD
  */
 
-#include "ext4_jbd2.h"
+#include <linux/ext4_jbd2.h>
 
 int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
 				struct buffer_head *bh)
 {
 	int err = jbd2_journal_get_undo_access(handle, bh);
 	if (err)
-		ext4_journal_abort_handle(where, __func__, bh, handle, err);
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
 }
 
@@ -18,7 +18,7 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
 {
 	int err = jbd2_journal_get_write_access(handle, bh);
 	if (err)
-		ext4_journal_abort_handle(where, __func__, bh, handle, err);
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
 }
 
@@ -27,7 +27,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
 {
 	int err = jbd2_journal_forget(handle, bh);
 	if (err)
-		ext4_journal_abort_handle(where, __func__, bh, handle, err);
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
 }
 
@@ -36,7 +36,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
 {
 	int err = jbd2_journal_revoke(handle, blocknr, bh);
 	if (err)
-		ext4_journal_abort_handle(where, __func__, bh, handle, err);
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
 }
 
@@ -45,7 +45,7 @@ int __ext4_journal_get_create_access(const char *where,
 {
 	int err = jbd2_journal_get_create_access(handle, bh);
 	if (err)
-		ext4_journal_abort_handle(where, __func__, bh, handle, err);
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
 }
 
@@ -54,6 +54,6 @@ int __ext4_journal_dirty_metadata(const char *where,
 {
 	int err = jbd2_journal_dirty_metadata(handle, bh);
 	if (err)
-		ext4_journal_abort_handle(where, __func__, bh, handle, err);
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
 }
diff --git a/trunk/fs/ext4/extents.c b/trunk/fs/ext4/extents.c
index 47929c4e3dae..9ae6e67090cd 100644
--- a/trunk/fs/ext4/extents.c
+++ b/trunk/fs/ext4/extents.c
@@ -32,6 +32,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/time.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/jbd2.h>
 #include <linux/highuid.h>
 #include <linux/pagemap.h>
@@ -39,9 +40,8 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/falloc.h>
+#include <linux/ext4_fs_extents.h>
 #include <asm/uaccess.h>
-#include "ext4_jbd2.h"
-#include "ext4_extents.h"
 
 
 /*
@@ -308,7 +308,7 @@ static int __ext4_ext_check_header(const char *function, struct inode *inode,
 }
 
 #define ext4_ext_check_header(inode, eh, depth)	\
-	__ext4_ext_check_header(__func__, inode, eh, depth)
+	__ext4_ext_check_header(__FUNCTION__, inode, eh, depth)
 
 #ifdef EXT_DEBUG
 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -614,7 +614,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 
 	ix->ei_block = cpu_to_le32(logical);
 	ext4_idx_store_pblock(ix, ptr);
-	le16_add_cpu(&curp->p_hdr->eh_entries, 1);
+	curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
 
 	BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
 			     > le16_to_cpu(curp->p_hdr->eh_max));
@@ -736,7 +736,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	}
 	if (m) {
 		memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
-		le16_add_cpu(&neh->eh_entries, m);
+		neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m);
 	}
 
 	set_buffer_uptodate(bh);
@@ -753,7 +753,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		err = ext4_ext_get_access(handle, inode, path + depth);
 		if (err)
 			goto cleanup;
-		le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
+		path[depth].p_hdr->eh_entries =
+		     cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
 		err = ext4_ext_dirty(handle, inode, path + depth);
 		if (err)
 			goto cleanup;
@@ -816,7 +817,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		if (m) {
 			memmove(++fidx, path[i].p_idx - m,
 				sizeof(struct ext4_extent_idx) * m);
-			le16_add_cpu(&neh->eh_entries, m);
+			neh->eh_entries =
+				cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
 		}
 		set_buffer_uptodate(bh);
 		unlock_buffer(bh);
@@ -832,7 +834,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 			err = ext4_ext_get_access(handle, inode, path + i);
 			if (err)
 				goto cleanup;
-			le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
+			path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m);
 			err = ext4_ext_dirty(handle, inode, path + i);
 			if (err)
 				goto cleanup;
@@ -1367,7 +1369,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
 				* sizeof(struct ext4_extent);
 			memmove(ex + 1, ex + 2, len);
 		}
-		le16_add_cpu(&eh->eh_entries, -1);
+		eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1);
 		merge_done = 1;
 		WARN_ON(eh->eh_entries == 0);
 		if (!eh->eh_entries)
@@ -1558,7 +1560,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 		path[depth].p_ext = nearex;
 	}
 
-	le16_add_cpu(&eh->eh_entries, 1);
+	eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
 	nearex = path[depth].p_ext;
 	nearex->ee_block = newext->ee_block;
 	ext4_ext_store_pblock(nearex, ext_pblock(newext));
@@ -1697,7 +1699,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 	err = ext4_ext_get_access(handle, inode, path);
 	if (err)
 		return err;
-	le16_add_cpu(&path->p_hdr->eh_entries, -1);
+	path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
 	err = ext4_ext_dirty(handle, inode, path);
 	if (err)
 		return err;
@@ -1900,7 +1902,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		if (num == 0) {
 			/* this extent is removed; mark slot entirely unused */
 			ext4_ext_store_pblock(ex, 0);
-			le16_add_cpu(&eh->eh_entries, -1);
+			eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
 		}
 
 		ex->ee_block = cpu_to_le32(block);
@@ -1977,7 +1979,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
 	 * We start scanning from right side, freeing all the blocks
 	 * after i_size and walking into the tree depth-wise.
 	 */
-	path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
+	path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
 	if (path == NULL) {
 		ext4_journal_stop(handle);
 		return -ENOMEM;
@@ -2136,82 +2138,6 @@ void ext4_ext_release(struct super_block *sb)
 #endif
 }
 
-static void bi_complete(struct bio *bio, int error)
-{
-	complete((struct completion *)bio->bi_private);
-}
-
-/* FIXME!! we need to try to merge to left or right after zero-out  */
-static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
-{
-	int ret = -EIO;
-	struct bio *bio;
-	int blkbits, blocksize;
-	sector_t ee_pblock;
-	struct completion event;
-	unsigned int ee_len, len, done, offset;
-
-
-	blkbits   = inode->i_blkbits;
-	blocksize = inode->i_sb->s_blocksize;
-	ee_len    = ext4_ext_get_actual_len(ex);
-	ee_pblock = ext_pblock(ex);
-
-	/* convert ee_pblock to 512 byte sectors */
-	ee_pblock = ee_pblock << (blkbits - 9);
-
-	while (ee_len > 0) {
-
-		if (ee_len > BIO_MAX_PAGES)
-			len = BIO_MAX_PAGES;
-		else
-			len = ee_len;
-
-		bio = bio_alloc(GFP_NOIO, len);
-		if (!bio)
-			return -ENOMEM;
-		bio->bi_sector = ee_pblock;
-		bio->bi_bdev   = inode->i_sb->s_bdev;
-
-		done = 0;
-		offset = 0;
-		while (done < len) {
-			ret = bio_add_page(bio, ZERO_PAGE(0),
-							blocksize, offset);
-			if (ret != blocksize) {
-				/*
-				 * We can't add any more pages because of
-				 * hardware limitations.  Start a new bio.
-				 */
-				break;
-			}
-			done++;
-			offset += blocksize;
-			if (offset >= PAGE_CACHE_SIZE)
-				offset = 0;
-		}
-
-		init_completion(&event);
-		bio->bi_private = &event;
-		bio->bi_end_io = bi_complete;
-		submit_bio(WRITE, bio);
-		wait_for_completion(&event);
-
-		if (test_bit(BIO_UPTODATE, &bio->bi_flags))
-			ret = 0;
-		else {
-			ret = -EIO;
-			break;
-		}
-		bio_put(bio);
-		ee_len    -= done;
-		ee_pblock += done  << (blkbits - 9);
-	}
-	return ret;
-}
-
-#define EXT4_EXT_ZERO_LEN 7
-
 /*
  * This function is called by ext4_ext_get_blocks() if someone tries to write
  * to an uninitialized extent. It may result in splitting the uninitialized
@@ -2228,7 +2154,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 						ext4_lblk_t iblock,
 						unsigned long max_blocks)
 {
-	struct ext4_extent *ex, newex, orig_ex;
+	struct ext4_extent *ex, newex;
 	struct ext4_extent *ex1 = NULL;
 	struct ext4_extent *ex2 = NULL;
 	struct ext4_extent *ex3 = NULL;
@@ -2247,26 +2173,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	allocated = ee_len - (iblock - ee_block);
 	newblock = iblock - ee_block + ext_pblock(ex);
 	ex2 = ex;
-	orig_ex.ee_block = ex->ee_block;
-	orig_ex.ee_len   = cpu_to_le16(ee_len);
-	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
 
 	err = ext4_ext_get_access(handle, inode, path + depth);
 	if (err)
 		goto out;
-	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
-	if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
-		err =  ext4_ext_zeroout(inode, &orig_ex);
-		if (err)
-			goto fix_extent_len;
-		/* update the extent length and mark as initialized */
-		ex->ee_block = orig_ex.ee_block;
-		ex->ee_len   = orig_ex.ee_len;
-		ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
-		ext4_ext_dirty(handle, inode, path + depth);
-		/* zeroed the full extent */
-		return allocated;
-	}
 
 	/* ex1: ee_block to iblock - 1 : uninitialized */
 	if (iblock > ee_block) {
@@ -2285,103 +2195,19 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	/* ex3: to ee_block + ee_len : uninitialised */
 	if (allocated > max_blocks) {
 		unsigned int newdepth;
-		/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
-		if (allocated <= EXT4_EXT_ZERO_LEN) {
-			/* Mark first half uninitialized.
-			 * Mark second half initialized and zero out the
-			 * initialized extent
-			 */
-			ex->ee_block = orig_ex.ee_block;
-			ex->ee_len   = cpu_to_le16(ee_len - allocated);
-			ext4_ext_mark_uninitialized(ex);
-			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
-			ext4_ext_dirty(handle, inode, path + depth);
-
-			ex3 = &newex;
-			ex3->ee_block = cpu_to_le32(iblock);
-			ext4_ext_store_pblock(ex3, newblock);
-			ex3->ee_len = cpu_to_le16(allocated);
-			err = ext4_ext_insert_extent(handle, inode, path, ex3);
-			if (err == -ENOSPC) {
-				err =  ext4_ext_zeroout(inode, &orig_ex);
-				if (err)
-					goto fix_extent_len;
-				ex->ee_block = orig_ex.ee_block;
-				ex->ee_len   = orig_ex.ee_len;
-				ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
-				ext4_ext_dirty(handle, inode, path + depth);
-				/* zeroed the full extent */
-				return allocated;
-
-			} else if (err)
-				goto fix_extent_len;
-
-			/*
-			 * We need to zero out the second half because
-			 * an fallocate request can update file size and
-			 * converting the second half to initialized extent
-			 * implies that we can leak some junk data to user
-			 * space.
-			 */
-			err =  ext4_ext_zeroout(inode, ex3);
-			if (err) {
-				/*
-				 * We should actually mark the
-				 * second half as uninit and return error
-				 * Insert would have changed the extent
-				 */
-				depth = ext_depth(inode);
-				ext4_ext_drop_refs(path);
-				path = ext4_ext_find_extent(inode,
-								iblock, path);
-				if (IS_ERR(path)) {
-					err = PTR_ERR(path);
-					return err;
-				}
-				ex = path[depth].p_ext;
-				err = ext4_ext_get_access(handle, inode,
-								path + depth);
-				if (err)
-					return err;
-				ext4_ext_mark_uninitialized(ex);
-				ext4_ext_dirty(handle, inode, path + depth);
-				return err;
-			}
-
-			/* zeroed the second half */
-			return allocated;
-		}
 		ex3 = &newex;
 		ex3->ee_block = cpu_to_le32(iblock + max_blocks);
 		ext4_ext_store_pblock(ex3, newblock + max_blocks);
 		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
 		ext4_ext_mark_uninitialized(ex3);
 		err = ext4_ext_insert_extent(handle, inode, path, ex3);
-		if (err == -ENOSPC) {
-			err =  ext4_ext_zeroout(inode, &orig_ex);
-			if (err)
-				goto fix_extent_len;
-			/* update the extent length and mark as initialized */
-			ex->ee_block = orig_ex.ee_block;
-			ex->ee_len   = orig_ex.ee_len;
-			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
-			ext4_ext_dirty(handle, inode, path + depth);
-			/* zeroed the full extent */
-			return allocated;
-
-		} else if (err)
-			goto fix_extent_len;
+		if (err)
+			goto out;
 		/*
 		 * The depth, and hence eh & ex might change
 		 * as part of the insert above.
 		 */
 		newdepth = ext_depth(inode);
-		/*
-		 * update the extent length after successfull insert of the
-		 * split extent
-		 */
-		orig_ex.ee_len = cpu_to_le16(ee_len -
-						ext4_ext_get_actual_len(ex3));
 		if (newdepth != depth) {
 			depth = newdepth;
 			ext4_ext_drop_refs(path);
@@ -2400,24 +2226,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 				goto out;
 		}
 		allocated = max_blocks;
-
-		/* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
-		 * to insert a extent in the middle zerout directly
-		 * otherwise give the extent a chance to merge to left
-		 */
-		if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
-							iblock != ee_block) {
-			err =  ext4_ext_zeroout(inode, &orig_ex);
-			if (err)
-				goto fix_extent_len;
-			/* update the extent length and mark as initialized */
-			ex->ee_block = orig_ex.ee_block;
-			ex->ee_len   = orig_ex.ee_len;
-			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
-			ext4_ext_dirty(handle, inode, path + depth);
-			/* zero out the first half */
-			return allocated;
-		}
 	}
 	/*
 	 * If there was a change of depth as part of the
@@ -2474,29 +2282,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	goto out;
 insert:
 	err = ext4_ext_insert_extent(handle, inode, path, &newex);
-	if (err == -ENOSPC) {
-		err =  ext4_ext_zeroout(inode, &orig_ex);
-		if (err)
-			goto fix_extent_len;
-		/* update the extent length and mark as initialized */
-		ex->ee_block = orig_ex.ee_block;
-		ex->ee_len   = orig_ex.ee_len;
-		ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
-		ext4_ext_dirty(handle, inode, path + depth);
-		/* zero out the first half */
-		return allocated;
-	} else if (err)
-		goto fix_extent_len;
 out:
 	return err ? err : allocated;
-
-fix_extent_len:
-	ex->ee_block = orig_ex.ee_block;
-	ex->ee_len   = orig_ex.ee_len;
-	ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
-	ext4_ext_mark_uninitialized(ex);
-	ext4_ext_dirty(handle, inode, path + depth);
-	return err;
 }
 
 /*
@@ -2606,20 +2393,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			}
 			if (create == EXT4_CREATE_UNINITIALIZED_EXT)
 				goto out;
-			if (!create) {
-				/*
-				 * We have blocks reserved already.  We
-				 * return allocated blocks so that delalloc
-				 * won't do block reservation for us.  But
-				 * the buffer head will be unmapped so that
-				 * a read from the block returns 0s.
-				 */
-				if (allocated > max_blocks)
-					allocated = max_blocks;
-				/* mark the buffer unwritten */
-				__set_bit(BH_Unwritten, &bh_result->b_state);
+			if (!create)
 				goto out2;
-			}
 
 			ret = ext4_ext_convert_to_initialized(handle, inode,
 								path, iblock,
@@ -2809,8 +2584,6 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
 		ext4_orphan_del(handle, inode);
 
 	up_write(&EXT4_I(inode)->i_data_sem);
-	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
-	ext4_mark_inode_dirty(handle, inode);
 	ext4_journal_stop(handle);
 }
 
@@ -2835,28 +2608,6 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
 	return needed;
 }
 
-static void ext4_falloc_update_inode(struct inode *inode,
-				int mode, loff_t new_size, int update_ctime)
-{
-	struct timespec now;
-
-	if (update_ctime) {
-		now = current_fs_time(inode->i_sb);
-		if (!timespec_equal(&inode->i_ctime, &now))
-			inode->i_ctime = now;
-	}
-	/*
-	 * Update only when preallocation was requested beyond
-	 * the file size.
-	 */
-	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-				new_size > i_size_read(inode)) {
-		i_size_write(inode, new_size);
-		EXT4_I(inode)->i_disksize = new_size;
-	}
-
-}
-
 /*
  * preallocate space for a file. This implements ext4's fallocate inode
  * operation, which gets called from sys_fallocate system call.
@@ -2868,8 +2619,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 {
 	handle_t *handle;
 	ext4_lblk_t block;
-	loff_t new_size;
 	unsigned long max_blocks;
+	ext4_fsblk_t nblocks = 0;
 	int ret = 0;
 	int ret2 = 0;
 	int retries = 0;
@@ -2888,12 +2639,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 		return -ENODEV;
 
 	block = offset >> blkbits;
-	/*
-	 * We can't just convert len to max_blocks because
-	 * If blocksize = 4096 offset = 3072 and len = 2048
-	 */
 	max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
-							- block;
+			- block;
+
 	/*
 	 * credits to insert 1 extent into extent tree + buffers to be able to
 	 * modify 1 super block, 1 block bitmap and 1 group descriptor.
@@ -2909,6 +2657,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 			ret = PTR_ERR(handle);
 			break;
 		}
+
 		ret = ext4_get_blocks_wrap(handle, inode, block,
 					  max_blocks, &map_bh,
 					  EXT4_CREATE_UNINITIALIZED_EXT, 0);
@@ -2924,24 +2673,61 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 			ret2 = ext4_journal_stop(handle);
 			break;
 		}
-		if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
-						blkbits) >> blkbits))
-			new_size = offset + len;
-		else
-			new_size = (block + ret) << blkbits;
+		if (ret > 0) {
+			/* check wrap through sign-bit/zero here */
+			if ((block + ret) < 0 || (block + ret) < block) {
+				ret = -EIO;
+				ext4_mark_inode_dirty(handle, inode);
+				ret2 = ext4_journal_stop(handle);
+				break;
+			}
+			if (buffer_new(&map_bh) && ((block + ret) >
+			    (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
+			    >> blkbits)))
+					nblocks = nblocks + ret;
+		}
+
+		/* Update ctime if new blocks get allocated */
+		if (nblocks) {
+			struct timespec now;
+
+			now = current_fs_time(inode->i_sb);
+			if (!timespec_equal(&inode->i_ctime, &now))
+				inode->i_ctime = now;
+		}
 
-		ext4_falloc_update_inode(inode, mode, new_size,
-						buffer_new(&map_bh));
 		ext4_mark_inode_dirty(handle, inode);
 		ret2 = ext4_journal_stop(handle);
 		if (ret2)
 			break;
 	}
-	if (ret == -ENOSPC &&
-			ext4_should_retry_alloc(inode->i_sb, &retries)) {
-		ret = 0;
+
+	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
+
+	/*
+	 * Time to update the file size.
+	 * Update only when preallocation was requested beyond the file size.
+	 */
+	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+	    (offset + len) > i_size_read(inode)) {
+		if (ret > 0) {
+			/*
+			 * if no error, we assume preallocation succeeded
+			 * completely
+			 */
+			i_size_write(inode, offset + len);
+			EXT4_I(inode)->i_disksize = i_size_read(inode);
+		} else if (ret < 0 && nblocks) {
+			/* Handle partial allocation scenario */
+			loff_t newsize;
+
+			newsize  = (nblocks << blkbits) + i_size_read(inode);
+			i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
+			EXT4_I(inode)->i_disksize = i_size_read(inode);
+		}
 	}
+
 	mutex_unlock(&inode->i_mutex);
 	return ret > 0 ? ret2 : ret;
 }
diff --git a/trunk/fs/ext4/file.c b/trunk/fs/ext4/file.c
index 4159be6366ab..ac35ec58db55 100644
--- a/trunk/fs/ext4/file.c
+++ b/trunk/fs/ext4/file.c
@@ -21,8 +21,8 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include "ext4.h"
-#include "ext4_jbd2.h"
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd2.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -129,7 +129,7 @@ const struct file_operations ext4_file_operations = {
 	.write		= do_sync_write,
 	.aio_read	= generic_file_aio_read,
 	.aio_write	= ext4_file_write,
-	.unlocked_ioctl = ext4_ioctl,
+	.ioctl		= ext4_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext4_compat_ioctl,
 #endif
diff --git a/trunk/fs/ext4/fsync.c b/trunk/fs/ext4/fsync.c
index 1c8ba48d4f8d..8d50879d1c2c 100644
--- a/trunk/fs/ext4/fsync.c
+++ b/trunk/fs/ext4/fsync.c
@@ -27,8 +27,8 @@
 #include <linux/sched.h>
 #include <linux/writeback.h>
 #include <linux/jbd2.h>
-#include "ext4.h"
-#include "ext4_jbd2.h"
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd2.h>
 
 /*
  * akpm: A new design for ext4_sync_file().
@@ -72,9 +72,6 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
 		goto out;
 	}
 
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		goto out;
-
 	/*
 	 * The VFS has written the file data.  If the inode is unaltered
 	 * then we need not start a commit.
diff --git a/trunk/fs/ext4/hash.c b/trunk/fs/ext4/hash.c
index 1d6329dbe390..1555024e3b36 100644
--- a/trunk/fs/ext4/hash.c
+++ b/trunk/fs/ext4/hash.c
@@ -11,8 +11,8 @@
 
 #include <linux/fs.h>
 #include <linux/jbd2.h>
+#include <linux/ext4_fs.h>
 #include <linux/cryptohash.h>
-#include "ext4.h"
 
 #define DELTA 0x9E3779B9
 
diff --git a/trunk/fs/ext4/ialloc.c b/trunk/fs/ext4/ialloc.c
index c6efbab0c801..486e46a3918d 100644
--- a/trunk/fs/ext4/ialloc.c
+++ b/trunk/fs/ext4/ialloc.c
@@ -15,6 +15,8 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/quotaops.h>
@@ -23,8 +25,7 @@
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
 #include <asm/byteorder.h>
-#include "ext4.h"
-#include "ext4_jbd2.h"
+
 #include "xattr.h"
 #include "acl.h"
 #include "group.h"
@@ -74,7 +75,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
 	/* If checksum is bad mark all blocks and inodes use to prevent
 	 * allocation, essentially implementing a per-group read-only flag. */
 	if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
-		ext4_error(sb, __func__, "Checksum bad for group %lu\n",
+		ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n",
 			   block_group);
 		gdp->bg_free_blocks_count = 0;
 		gdp->bg_free_inodes_count = 0;
@@ -222,9 +223,11 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
 
 		if (gdp) {
 			spin_lock(sb_bgl_lock(sbi, block_group));
-			le16_add_cpu(&gdp->bg_free_inodes_count, 1);
+			gdp->bg_free_inodes_count = cpu_to_le16(
+				le16_to_cpu(gdp->bg_free_inodes_count) + 1);
 			if (is_directory)
-				le16_add_cpu(&gdp->bg_used_dirs_count, -1);
+				gdp->bg_used_dirs_count = cpu_to_le16(
+				  le16_to_cpu(gdp->bg_used_dirs_count) - 1);
 			gdp->bg_checksum = ext4_group_desc_csum(sbi,
 							block_group, gdp);
 			spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -585,7 +588,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
 	ino++;
 	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
 	    ino > EXT4_INODES_PER_GROUP(sb)) {
-		ext4_error(sb, __func__,
+		ext4_error(sb, __FUNCTION__,
 			   "reserved inode or inode > inodes count - "
 			   "block_group = %lu, inode=%lu", group,
 			   ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -661,9 +664,11 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
 				cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
 	}
 
-	le16_add_cpu(&gdp->bg_free_inodes_count, -1);
+	gdp->bg_free_inodes_count =
+		cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
 	if (S_ISDIR(mode)) {
-		le16_add_cpu(&gdp->bg_used_dirs_count, 1);
+		gdp->bg_used_dirs_count =
+			cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
 	}
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, group));
@@ -739,24 +744,23 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
 	if (err)
 		goto fail_free_drop;
 
+	err = ext4_mark_inode_dirty(handle, inode);
+	if (err) {
+		ext4_std_error(sb, err);
+		goto fail_free_drop;
+	}
 	if (test_opt(sb, EXTENTS)) {
-		/* set extent flag only for diretory, file and normal symlink*/
-		if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
+		/* set extent flag only for directory and file */
+		if (S_ISDIR(mode) || S_ISREG(mode)) {
 			EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
 			ext4_ext_tree_init(handle, inode);
 			err = ext4_update_incompat_feature(handle, sb,
 					EXT4_FEATURE_INCOMPAT_EXTENTS);
 			if (err)
-				goto fail_free_drop;
+				goto fail;
 		}
 	}
 
-	err = ext4_mark_inode_dirty(handle, inode);
-	if (err) {
-		ext4_std_error(sb, err);
-		goto fail_free_drop;
-	}
-
 	ext4_debug("allocating inode %lu\n", inode->i_ino);
 	goto really_out;
 fail:
@@ -792,7 +796,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
 
 	/* Error cases - e2fsck has already cleaned up for us */
 	if (ino > max_ino) {
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "bad orphan ino %lu!  e2fsck was run?", ino);
 		goto error;
 	}
@@ -801,7 +805,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
 	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
 	bitmap_bh = read_inode_bitmap(sb, block_group);
 	if (!bitmap_bh) {
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "inode bitmap error for orphan %lu", ino);
 		goto error;
 	}
@@ -826,7 +830,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
 	err = PTR_ERR(inode);
 	inode = NULL;
 bad_orphan:
-	ext4_warning(sb, __func__,
+	ext4_warning(sb, __FUNCTION__,
 		     "bad orphan inode %lu!  e2fsck was run?", ino);
 	printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
 	       bit, (unsigned long long)bitmap_bh->b_blocknr,
diff --git a/trunk/fs/ext4/inode.c b/trunk/fs/ext4/inode.c
index 8d9707746413..8fab233cb05f 100644
--- a/trunk/fs/ext4/inode.c
+++ b/trunk/fs/ext4/inode.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/time.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/jbd2.h>
 #include <linux/highuid.h>
 #include <linux/pagemap.h>
@@ -35,7 +36,6 @@
 #include <linux/mpage.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
-#include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
 
@@ -93,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
 	BUFFER_TRACE(bh, "call ext4_journal_revoke");
 	err = ext4_journal_revoke(handle, blocknr, bh);
 	if (err)
-		ext4_abort(inode->i_sb, __func__,
+		ext4_abort(inode->i_sb, __FUNCTION__,
 			   "error %d when attempting revoke", err);
 	BUFFER_TRACE(bh, "exit");
 	return err;
@@ -985,16 +985,6 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
 	} else {
 		retval = ext4_get_blocks_handle(handle, inode, block,
 				max_blocks, bh, create, extend_disksize);
-
-		if (retval > 0 && buffer_new(bh)) {
-			/*
-			 * We allocated new blocks which will result in
-			 * i_data's format changing.  Force the migrate
-			 * to fail by clearing migrate flags
-			 */
-			EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
-							~EXT4_EXT_MIGRATE;
-		}
 	}
 	up_write((&EXT4_I(inode)->i_data_sem));
 	return retval;
@@ -1240,7 +1230,7 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 {
 	int err = jbd2_journal_dirty_data(handle, bh);
 	if (err)
-		ext4_journal_abort_handle(__func__, __func__,
+		ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__,
 						bh, handle, err);
 	return err;
 }
@@ -1311,11 +1301,10 @@ static int ext4_ordered_write_end(struct file *file,
 		new_i_size = pos + copied;
 		if (new_i_size > EXT4_I(inode)->i_disksize)
 			EXT4_I(inode)->i_disksize = new_i_size;
-		ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
+		copied = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
-		copied = ret2;
-		if (ret2 < 0)
-			ret = ret2;
+		if (copied < 0)
+			ret = copied;
 	}
 	ret2 = ext4_journal_stop(handle);
 	if (!ret)
@@ -1340,11 +1329,10 @@ static int ext4_writeback_write_end(struct file *file,
 	if (new_i_size > EXT4_I(inode)->i_disksize)
 		EXT4_I(inode)->i_disksize = new_i_size;
 
-	ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
+	copied = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
-	copied = ret2;
-	if (ret2 < 0)
-		ret = ret2;
+	if (copied < 0)
+		ret = copied;
 
 	ret2 = ext4_journal_stop(handle);
 	if (!ret)
@@ -2513,10 +2501,12 @@ void ext4_truncate(struct inode *inode)
 static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
 		unsigned long ino, struct ext4_iloc *iloc)
 {
+	unsigned long desc, group_desc;
 	ext4_group_t block_group;
 	unsigned long offset;
 	ext4_fsblk_t block;
-	struct ext4_group_desc *gdp;
+	struct buffer_head *bh;
+	struct ext4_group_desc * gdp;
 
 	if (!ext4_valid_inum(sb, ino)) {
 		/*
@@ -2528,10 +2518,22 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
 	}
 
 	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
-	gdp = ext4_get_group_desc(sb, block_group, NULL);
-	if (!gdp)
+	if (block_group >= EXT4_SB(sb)->s_groups_count) {
+		ext4_error(sb,"ext4_get_inode_block","group >= groups count");
 		return 0;
+	}
+	smp_rmb();
+	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
+	desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
+	bh = EXT4_SB(sb)->s_group_desc[group_desc];
+	if (!bh) {
+		ext4_error (sb, "ext4_get_inode_block",
+			    "Descriptor not loaded");
+		return 0;
+	}
 
+	gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
+		desc * EXT4_DESC_SIZE(sb));
 	/*
 	 * Figure out the offset within the block group inode table
 	 */
@@ -2974,8 +2976,7 @@ static int ext4_do_update_inode(handle_t *handle,
 	if (ext4_inode_blocks_set(handle, raw_inode, ei))
 		goto out_brelse;
 	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
-	/* clear the migrate flag in the raw_inode */
-	raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
+	raw_inode->i_flags = cpu_to_le32(ei->i_flags);
 	if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
 	    cpu_to_le32(EXT4_OS_HURD))
 		raw_inode->i_file_acl_high =
@@ -3373,7 +3374,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
 				EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
 				if (mnt_count !=
 					le16_to_cpu(sbi->s_es->s_mnt_count)) {
-					ext4_warning(inode->i_sb, __func__,
+					ext4_warning(inode->i_sb, __FUNCTION__,
 					"Unable to expand inode %lu. Delete"
 					" some EAs or run e2fsck.",
 					inode->i_ino);
@@ -3414,7 +3415,7 @@ void ext4_dirty_inode(struct inode *inode)
 		current_handle->h_transaction != handle->h_transaction) {
 		/* This task has a transaction open against a different fs */
 		printk(KERN_EMERG "%s: transactions do not match!\n",
-		       __func__);
+		       __FUNCTION__);
 	} else {
 		jbd_debug(5, "marking dirty.  outer handle=%p\n",
 				current_handle);
diff --git a/trunk/fs/ext4/ioctl.c b/trunk/fs/ext4/ioctl.c
index 7a6c2f1faba6..25b13ede8086 100644
--- a/trunk/fs/ext4/ioctl.c
+++ b/trunk/fs/ext4/ioctl.c
@@ -10,17 +10,17 @@
 #include <linux/fs.h>
 #include <linux/jbd2.h>
 #include <linux/capability.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/time.h>
 #include <linux/compat.h>
 #include <linux/smp_lock.h>
 #include <linux/mount.h>
 #include <asm/uaccess.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
 
-long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+		unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned int flags;
 	unsigned short rsv_window_size;
@@ -277,6 +277,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 #ifdef CONFIG_COMPAT
 long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
+	struct inode *inode = file->f_path.dentry->d_inode;
+	int ret;
+
 	/* These are just misnamed, they actually get/put from/to user an int */
 	switch (cmd) {
 	case EXT4_IOC32_GETFLAGS:
@@ -316,6 +319,9 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	default:
 		return -ENOIOCTLCMD;
 	}
-	return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
+	lock_kernel();
+	ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+	unlock_kernel();
+	return ret;
 }
 #endif
diff --git a/trunk/fs/ext4/mballoc.c b/trunk/fs/ext4/mballoc.c
index fbec2ef93797..9d57695de746 100644
--- a/trunk/fs/ext4/mballoc.c
+++ b/trunk/fs/ext4/mballoc.c
@@ -21,7 +21,21 @@
  * mballoc.c contains the multiblocks allocation routines
  */
 
-#include "mballoc.h"
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/ext4_jbd2.h>
+#include <linux/ext4_fs.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/proc_fs.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#include "group.h"
+
 /*
  * MUSTDO:
  *   - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -331,6 +345,288 @@
  *
  */
 
+/*
+ * with AGGRESSIVE_CHECK allocator runs consistency checks over
+ * structures. these checks slow things down a lot
+ */
+#define AGGRESSIVE_CHECK__
+
+/*
+ * with DOUBLE_CHECK defined mballoc creates persistent in-core
+ * bitmaps, maintains and uses them to check for double allocations
+ */
+#define DOUBLE_CHECK__
+
+/*
+ */
+#define MB_DEBUG__
+#ifdef MB_DEBUG
+#define mb_debug(fmt, a...)	printk(fmt, ##a)
+#else
+#define mb_debug(fmt, a...)
+#endif
+
+/*
+ * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
+ * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
+ */
+#define EXT4_MB_HISTORY
+#define EXT4_MB_HISTORY_ALLOC		1	/* allocation */
+#define EXT4_MB_HISTORY_PREALLOC	2	/* preallocated blocks used */
+#define EXT4_MB_HISTORY_DISCARD		4	/* preallocation discarded */
+#define EXT4_MB_HISTORY_FREE		8	/* free */
+
+#define EXT4_MB_HISTORY_DEFAULT		(EXT4_MB_HISTORY_ALLOC | \
+					 EXT4_MB_HISTORY_PREALLOC)
+
+/*
+ * How long mballoc can look for a best extent (in found extents)
+ */
+#define MB_DEFAULT_MAX_TO_SCAN		200
+
+/*
+ * How long mballoc must look for a best extent
+ */
+#define MB_DEFAULT_MIN_TO_SCAN		10
+
+/*
+ * How many groups mballoc will scan looking for the best chunk
+ */
+#define MB_DEFAULT_MAX_GROUPS_TO_SCAN	5
+
+/*
+ * with 'ext4_mb_stats' allocator will collect stats that will be
+ * shown at umount. The collecting costs though!
+ */
+#define MB_DEFAULT_STATS		1
+
+/*
+ * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
+ * by the stream allocator, which purpose is to pack requests
+ * as close each to other as possible to produce smooth I/O traffic
+ * We use locality group prealloc space for stream request.
+ * We can tune the same via /proc/fs/ext4/<parition>/stream_req
+ */
+#define MB_DEFAULT_STREAM_THRESHOLD	16	/* 64K */
+
+/*
+ * for which requests use 2^N search using buddies
+ */
+#define MB_DEFAULT_ORDER2_REQS		2
+
+/*
+ * default group prealloc size 512 blocks
+ */
+#define MB_DEFAULT_GROUP_PREALLOC	512
+
+static struct kmem_cache *ext4_pspace_cachep;
+static struct kmem_cache *ext4_ac_cachep;
+
+#ifdef EXT4_BB_MAX_BLOCKS
+#undef EXT4_BB_MAX_BLOCKS
+#endif
+#define EXT4_BB_MAX_BLOCKS	30
+
+struct ext4_free_metadata {
+	ext4_group_t group;
+	unsigned short num;
+	ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
+	struct list_head list;
+};
+
+struct ext4_group_info {
+	unsigned long	bb_state;
+	unsigned long	bb_tid;
+	struct ext4_free_metadata *bb_md_cur;
+	unsigned short	bb_first_free;
+	unsigned short	bb_free;
+	unsigned short	bb_fragments;
+	struct		list_head bb_prealloc_list;
+#ifdef DOUBLE_CHECK
+	void		*bb_bitmap;
+#endif
+	unsigned short	bb_counters[];
+};
+
+#define EXT4_GROUP_INFO_NEED_INIT_BIT	0
+#define EXT4_GROUP_INFO_LOCKED_BIT	1
+
+#define EXT4_MB_GRP_NEED_INIT(grp)	\
+	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
+
+
+struct ext4_prealloc_space {
+	struct list_head	pa_inode_list;
+	struct list_head	pa_group_list;
+	union {
+		struct list_head pa_tmp_list;
+		struct rcu_head	pa_rcu;
+	} u;
+	spinlock_t		pa_lock;
+	atomic_t		pa_count;
+	unsigned		pa_deleted;
+	ext4_fsblk_t		pa_pstart;	/* phys. block */
+	ext4_lblk_t		pa_lstart;	/* log. block */
+	unsigned short		pa_len;		/* len of preallocated chunk */
+	unsigned short		pa_free;	/* how many blocks are free */
+	unsigned short		pa_linear;	/* consumed in one direction
+						 * strictly, for grp prealloc */
+	spinlock_t		*pa_obj_lock;
+	struct inode		*pa_inode;	/* hack, for history only */
+};
+
+
+struct ext4_free_extent {
+	ext4_lblk_t fe_logical;
+	ext4_grpblk_t fe_start;
+	ext4_group_t fe_group;
+	int fe_len;
+};
+
+/*
+ * Locality group:
+ *   we try to group all related changes together
+ *   so that writeback can flush/allocate them together as well
+ */
+struct ext4_locality_group {
+	/* for allocator */
+	struct mutex		lg_mutex;	/* to serialize allocates */
+	struct list_head	lg_prealloc_list;/* list of preallocations */
+	spinlock_t		lg_prealloc_lock;
+};
+
+struct ext4_allocation_context {
+	struct inode *ac_inode;
+	struct super_block *ac_sb;
+
+	/* original request */
+	struct ext4_free_extent ac_o_ex;
+
+	/* goal request (after normalization) */
+	struct ext4_free_extent ac_g_ex;
+
+	/* the best found extent */
+	struct ext4_free_extent ac_b_ex;
+
+	/* copy of the bext found extent taken before preallocation efforts */
+	struct ext4_free_extent ac_f_ex;
+
+	/* number of iterations done. we have to track to limit searching */
+	unsigned long ac_ex_scanned;
+	__u16 ac_groups_scanned;
+	__u16 ac_found;
+	__u16 ac_tail;
+	__u16 ac_buddy;
+	__u16 ac_flags;		/* allocation hints */
+	__u8 ac_status;
+	__u8 ac_criteria;
+	__u8 ac_repeats;
+	__u8 ac_2order;		/* if request is to allocate 2^N blocks and
+				 * N > 0, the field stores N, otherwise 0 */
+	__u8 ac_op;		/* operation, for history only */
+	struct page *ac_bitmap_page;
+	struct page *ac_buddy_page;
+	struct ext4_prealloc_space *ac_pa;
+	struct ext4_locality_group *ac_lg;
+};
+
+#define AC_STATUS_CONTINUE	1
+#define AC_STATUS_FOUND		2
+#define AC_STATUS_BREAK		3
+
+struct ext4_mb_history {
+	struct ext4_free_extent orig;	/* orig allocation */
+	struct ext4_free_extent goal;	/* goal allocation */
+	struct ext4_free_extent result;	/* result allocation */
+	unsigned pid;
+	unsigned ino;
+	__u16 found;	/* how many extents have been found */
+	__u16 groups;	/* how many groups have been scanned */
+	__u16 tail;	/* what tail broke some buddy */
+	__u16 buddy;	/* buddy the tail ^^^ broke */
+	__u16 flags;
+	__u8 cr:3;	/* which phase the result extent was found at */
+	__u8 op:4;
+	__u8 merged:1;
+};
+
+struct ext4_buddy {
+	struct page *bd_buddy_page;
+	void *bd_buddy;
+	struct page *bd_bitmap_page;
+	void *bd_bitmap;
+	struct ext4_group_info *bd_info;
+	struct super_block *bd_sb;
+	__u16 bd_blkbits;
+	ext4_group_t bd_group;
+};
+#define EXT4_MB_BITMAP(e4b)	((e4b)->bd_bitmap)
+#define EXT4_MB_BUDDY(e4b)	((e4b)->bd_buddy)
+
+#ifndef EXT4_MB_HISTORY
+static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
+{
+	return;
+}
+#else
+static void ext4_mb_store_history(struct ext4_allocation_context *ac);
+#endif
+
+#define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
+
+static struct proc_dir_entry *proc_root_ext4;
+struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
+ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
+			ext4_fsblk_t goal, unsigned long *count, int *errp);
+
+static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+					ext4_group_t group);
+static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
+static void ext4_mb_free_committed_blocks(struct super_block *);
+static void ext4_mb_return_to_preallocation(struct inode *inode,
+					struct ext4_buddy *e4b, sector_t block,
+					int count);
+static void ext4_mb_put_pa(struct ext4_allocation_context *,
+			struct super_block *, struct ext4_prealloc_space *pa);
+static int ext4_mb_init_per_dev_proc(struct super_block *sb);
+static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
+
+
+static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
+{
+	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+
+	bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+}
+
+static inline void ext4_unlock_group(struct super_block *sb,
+					ext4_group_t group)
+{
+	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+
+	bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+}
+
+static inline int ext4_is_group_locked(struct super_block *sb,
+					ext4_group_t group)
+{
+	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+
+	return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
+						&(grinfo->bb_state));
+}
+
+static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
+					struct ext4_free_extent *fex)
+{
+	ext4_fsblk_t block;
+
+	block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
+			+ fex->fe_start
+			+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+	return block;
+}
+
 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
 {
 #if BITS_PER_LONG == 64
@@ -440,7 +736,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
 			blocknr +=
 			    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
 
-			ext4_error(sb, __func__, "double-free of inode"
+			ext4_error(sb, __FUNCTION__, "double-free of inode"
 				   " %lu's block %llu(bit %u in group %lu)\n",
 				   inode ? inode->i_ino : 0, blocknr,
 				   first + i, e4b->bd_group);
@@ -602,17 +898,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
 	list_for_each(cur, &grp->bb_prealloc_list) {
 		ext4_group_t groupnr;
 		struct ext4_prealloc_space *pa;
-		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
-		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
+		pa = list_entry(cur, struct ext4_prealloc_space, group_list);
+		ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k);
 		MB_CHECK_ASSERT(groupnr == e4b->bd_group);
-		for (i = 0; i < pa->pa_len; i++)
+		for (i = 0; i < pa->len; i++)
 			MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
 	}
 	return 0;
 }
 #undef MB_CHECK_ASSERT
 #define mb_check_buddy(e4b) __mb_check_buddy(e4b,	\
-					__FILE__, __func__, __LINE__)
+					__FILE__, __FUNCTION__, __LINE__)
 #else
 #define mb_check_buddy(e4b)
 #endif
@@ -686,7 +982,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
 	grp->bb_fragments = fragments;
 
 	if (free != grp->bb_free) {
-		ext4_error(sb, __func__,
+		ext4_error(sb, __FUNCTION__,
 			"EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
 			group, free, grp->bb_free);
 		/*
@@ -872,9 +1168,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 	return err;
 }
 
-static noinline_for_stack int
-ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
-					struct ext4_buddy *e4b)
+static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+		struct ext4_buddy *e4b)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct inode *inode = sbi->s_buddy_cache;
@@ -1072,7 +1367,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
 			blocknr +=
 			    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
 
-			ext4_error(sb, __func__, "double-free of inode"
+			ext4_error(sb, __FUNCTION__, "double-free of inode"
 				   " %lu's block %llu(bit %u in group %lu)\n",
 				   inode ? inode->i_ino : 0, blocknr, block,
 				   e4b->bd_group);
@@ -1553,7 +1848,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 			 * free blocks even though group info says we
 			 * we have free blocks
 			 */
-			ext4_error(sb, __func__, "%d free blocks as per "
+			ext4_error(sb, __FUNCTION__, "%d free blocks as per "
 					"group info. But bitmap says 0\n",
 					free);
 			break;
@@ -1562,7 +1857,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 		mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
 		BUG_ON(ex.fe_len <= 0);
 		if (free < ex.fe_len) {
-			ext4_error(sb, __func__, "%d free blocks as per "
+			ext4_error(sb, __FUNCTION__, "%d free blocks as per "
 					"group info. But got %d blocks\n",
 					free, ex.fe_len);
 			/*
@@ -1670,8 +1965,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 	return 0;
 }
 
-static noinline_for_stack int
-ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
 	ext4_group_t group;
 	ext4_group_t i;
@@ -2171,8 +2465,7 @@ static void ext4_mb_history_init(struct super_block *sb)
 	/* if we can't allocate history, then we simple won't use it */
 }
 
-static noinline_for_stack void
-ext4_mb_store_history(struct ext4_allocation_context *ac)
+static void ext4_mb_store_history(struct ext4_allocation_context *ac)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	struct ext4_mb_history h;
@@ -2272,13 +2565,13 @@ static int ext4_mb_init_backend(struct super_block *sb)
 		meta_group_info[j] = kzalloc(len, GFP_KERNEL);
 		if (meta_group_info[j] == NULL) {
 			printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
+			i--;
 			goto err_freebuddy;
 		}
 		desc = ext4_get_group_desc(sb, i, NULL);
 		if (desc == NULL) {
 			printk(KERN_ERR
 				"EXT4-fs: can't read descriptor %lu\n", i);
-			i++;
 			goto err_freebuddy;
 		}
 		memset(meta_group_info[j], 0, len);
@@ -2318,11 +2611,13 @@ static int ext4_mb_init_backend(struct super_block *sb)
 	return 0;
 
 err_freebuddy:
-	while (i-- > 0)
+	while (i >= 0) {
 		kfree(ext4_get_group_info(sb, i));
+		i--;
+	}
 	i = num_meta_group_infos;
 err_freemeta:
-	while (i-- > 0)
+	while (--i >= 0)
 		kfree(sbi->s_group_info[i]);
 	iput(sbi->s_buddy_cache);
 err_freesgi:
@@ -2506,8 +2801,7 @@ int ext4_mb_release(struct super_block *sb)
 	return 0;
 }
 
-static noinline_for_stack void
-ext4_mb_free_committed_blocks(struct super_block *sb)
+static void ext4_mb_free_committed_blocks(struct super_block *sb)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	int err;
@@ -2727,8 +3021,7 @@ void exit_ext4_mballoc(void)
  * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
  * Returns 0 if success or error code
  */
-static noinline_for_stack int
-ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 				handle_t *handle)
 {
 	struct buffer_head *bitmap_bh = NULL;
@@ -2777,7 +3070,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 			in_range(block, ext4_inode_table(sb, gdp),
 				EXT4_SB(sb)->s_itb_per_group)) {
 
-		ext4_error(sb, __func__,
+		ext4_error(sb, __FUNCTION__,
 			   "Allocating block in system zone - block = %llu",
 			   block);
 	}
@@ -2801,7 +3094,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 						ac->ac_b_ex.fe_group,
 						gdp));
 	}
-	le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
+	gdp->bg_free_blocks_count =
+		cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
+				- ac->ac_b_ex.fe_len);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
 	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -2835,7 +3130,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
 		ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
 	else
 		ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
-	mb_debug("#%u: goal %u blocks for locality group\n",
+	mb_debug("#%u: goal %lu blocks for locality group\n",
 		current->pid, ac->ac_g_ex.fe_len);
 }
 
@@ -2843,16 +3138,15 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
  * Normalization means making request better in terms of
  * size and alignment
  */
-static noinline_for_stack void
-ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 				struct ext4_allocation_request *ar)
 {
 	int bsbits, max;
 	ext4_lblk_t end;
+	struct list_head *cur;
 	loff_t size, orig_size, start_off;
 	ext4_lblk_t start, orig_start;
 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
-	struct ext4_prealloc_space *pa;
 
 	/* do normalize only data requests, metadata requests
 	   do not need preallocation */
@@ -2938,9 +3232,12 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 
 	/* check we don't cross already preallocated blocks */
 	rcu_read_lock();
-	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
+	list_for_each_rcu(cur, &ei->i_prealloc_list) {
+		struct ext4_prealloc_space *pa;
 		unsigned long pa_end;
 
+		pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
+
 		if (pa->pa_deleted)
 			continue;
 		spin_lock(&pa->pa_lock);
@@ -2982,8 +3279,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 
 	/* XXX: extra loop to check we really don't overlap preallocations */
 	rcu_read_lock();
-	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
+	list_for_each_rcu(cur, &ei->i_prealloc_list) {
+		struct ext4_prealloc_space *pa;
 		unsigned long pa_end;
+		pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
 		spin_lock(&pa->pa_lock);
 		if (pa->pa_deleted == 0) {
 			pa_end = pa->pa_lstart + pa->pa_len;
@@ -3075,7 +3374,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
 	BUG_ON(pa->pa_free < len);
 	pa->pa_free -= len;
 
-	mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
+	mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa);
 }
 
 /*
@@ -3105,12 +3404,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 /*
  * search goal blocks in preallocated space
  */
-static noinline_for_stack int
-ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
+static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 {
 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
 	struct ext4_locality_group *lg;
 	struct ext4_prealloc_space *pa;
+	struct list_head *cur;
 
 	/* only data can be preallocated */
 	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3118,7 +3417,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 
 	/* first, try per-file preallocation */
 	rcu_read_lock();
-	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
+	list_for_each_rcu(cur, &ei->i_prealloc_list) {
+		pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
 
 		/* all fields in this condition don't change,
 		 * so we can skip locking for them */
@@ -3150,7 +3450,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 		return 0;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
+	list_for_each_rcu(cur, &lg->lg_prealloc_list) {
+		pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
 		spin_lock(&pa->pa_lock);
 		if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
 			atomic_inc(&pa->pa_count);
@@ -3270,8 +3571,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 /*
  * creates new preallocated space for given inode
  */
-static noinline_for_stack int
-ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
+static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 {
 	struct super_block *sb = ac->ac_sb;
 	struct ext4_prealloc_space *pa;
@@ -3358,8 +3658,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 /*
  * creates new preallocated space for locality group inodes belongs to
  */
-static noinline_for_stack int
-ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
+static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 {
 	struct super_block *sb = ac->ac_sb;
 	struct ext4_locality_group *lg;
@@ -3432,11 +3731,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
  * the caller MUST hold group/inode locks.
  * TODO: optimize the case when there are no in-core structures yet
  */
-static noinline_for_stack int
-ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
-			struct ext4_prealloc_space *pa,
-			struct ext4_allocation_context *ac)
+static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
+				struct buffer_head *bitmap_bh,
+				struct ext4_prealloc_space *pa)
 {
+	struct ext4_allocation_context *ac;
 	struct super_block *sb = e4b->bd_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	unsigned long end;
@@ -3452,6 +3751,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
 	end = bit + pa->pa_len;
 
+	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+
 	if (ac) {
 		ac->ac_sb = sb;
 		ac->ac_inode = pa->pa_inode;
@@ -3488,7 +3789,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 			pa, (unsigned long) pa->pa_lstart,
 			(unsigned long) pa->pa_pstart,
 			(unsigned long) pa->pa_len);
-		ext4_error(sb, __func__, "free %u, pa_free %u\n",
+		ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n",
 						free, pa->pa_free);
 		/*
 		 * pa is already deleted so we use the value obtained
@@ -3496,19 +3797,22 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 		 */
 	}
 	atomic_add(free, &sbi->s_mb_discarded);
+	if (ac)
+		kmem_cache_free(ext4_ac_cachep, ac);
 
 	return err;
 }
 
-static noinline_for_stack int
-ext4_mb_release_group_pa(struct ext4_buddy *e4b,
-				struct ext4_prealloc_space *pa,
-				struct ext4_allocation_context *ac)
+static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+				struct ext4_prealloc_space *pa)
 {
+	struct ext4_allocation_context *ac;
 	struct super_block *sb = e4b->bd_sb;
 	ext4_group_t group;
 	ext4_grpblk_t bit;
 
+	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+
 	if (ac)
 		ac->ac_op = EXT4_MB_HISTORY_DISCARD;
 
@@ -3526,6 +3830,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
 		ac->ac_b_ex.fe_len = pa->pa_len;
 		ac->ac_b_ex.fe_logical = 0;
 		ext4_mb_store_history(ac);
+		kmem_cache_free(ext4_ac_cachep, ac);
 	}
 
 	return 0;
@@ -3540,14 +3845,12 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
  * - how many do we discard
  *   1) how many requested
  */
-static noinline_for_stack int
-ext4_mb_discard_group_preallocations(struct super_block *sb,
+static int ext4_mb_discard_group_preallocations(struct super_block *sb,
 					ext4_group_t group, int needed)
 {
 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
 	struct buffer_head *bitmap_bh = NULL;
 	struct ext4_prealloc_space *pa, *tmp;
-	struct ext4_allocation_context *ac;
 	struct list_head list;
 	struct ext4_buddy e4b;
 	int err;
@@ -3575,7 +3878,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 	grp = ext4_get_group_info(sb, group);
 	INIT_LIST_HEAD(&list);
 
-	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
 	ext4_lock_group(sb, group);
 	list_for_each_entry_safe(pa, tmp,
@@ -3630,9 +3932,9 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 		spin_unlock(pa->pa_obj_lock);
 
 		if (pa->pa_linear)
-			ext4_mb_release_group_pa(&e4b, pa, ac);
+			ext4_mb_release_group_pa(&e4b, pa);
 		else
-			ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+			ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
 
 		list_del(&pa->u.pa_tmp_list);
 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3640,8 +3942,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 
 out:
 	ext4_unlock_group(sb, group);
-	if (ac)
-		kmem_cache_free(ext4_ac_cachep, ac);
 	ext4_mb_release_desc(&e4b);
 	put_bh(bitmap_bh);
 	return free;
@@ -3662,7 +3962,6 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *bitmap_bh = NULL;
 	struct ext4_prealloc_space *pa, *tmp;
-	struct ext4_allocation_context *ac;
 	ext4_group_t group = 0;
 	struct list_head list;
 	struct ext4_buddy e4b;
@@ -3677,7 +3976,6 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
 
 	INIT_LIST_HEAD(&list);
 
-	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
 	/* first, collect all pa's in the inode */
 	spin_lock(&ei->i_prealloc_lock);
@@ -3742,7 +4040,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
 
 		ext4_lock_group(sb, group);
 		list_del(&pa->pa_group_list);
-		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
 		ext4_unlock_group(sb, group);
 
 		ext4_mb_release_desc(&e4b);
@@ -3751,8 +4049,6 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
 		list_del(&pa->u.pa_tmp_list);
 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
 	}
-	if (ac)
-		kmem_cache_free(ext4_ac_cachep, ac);
 }
 
 /*
@@ -3812,7 +4108,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 			printk(KERN_ERR "PA:%lu:%d:%u \n", i,
 							start, pa->pa_len);
 		}
-		ext4_unlock_group(sb, i);
+		ext4_lock_group(sb, i);
 
 		if (grp->bb_free == 0)
 			continue;
@@ -3871,8 +4167,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
 	mutex_lock(&ac->ac_lg->lg_mutex);
 }
 
-static noinline_for_stack int
-ext4_mb_initialize_context(struct ext4_allocation_context *ac,
+static int ext4_mb_initialize_context(struct ext4_allocation_context *ac,
 				struct ext4_allocation_request *ar)
 {
 	struct super_block *sb = ar->inode->i_sb;
@@ -4103,8 +4398,7 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
 	ext4_mb_free_committed_blocks(sb);
 }
 
-static noinline_for_stack int
-ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
+static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 			  ext4_group_t group, ext4_grpblk_t block, int count)
 {
 	struct ext4_group_info *db = e4b->bd_info;
@@ -4195,7 +4489,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
 	if (block < le32_to_cpu(es->s_first_data_block) ||
 	    block + count < block ||
 	    block + count > ext4_blocks_count(es)) {
-		ext4_error(sb, __func__,
+		ext4_error(sb, __FUNCTION__,
 			    "Freeing blocks not in datazone - "
 			    "block = %lu, count = %lu", block, count);
 		goto error_return;
@@ -4236,7 +4530,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
 	    in_range(block + count - 1, ext4_inode_table(sb, gdp),
 		      EXT4_SB(sb)->s_itb_per_group)) {
 
-		ext4_error(sb, __func__,
+		ext4_error(sb, __FUNCTION__,
 			   "Freeing blocks in system zone - "
 			   "Block = %lu, count = %lu", block, count);
 	}
@@ -4294,7 +4588,8 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
 	}
 
 	spin_lock(sb_bgl_lock(sbi, block_group));
-	le16_add_cpu(&gdp->bg_free_blocks_count, count);
+	gdp->bg_free_blocks_count =
+		cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, block_group));
 	percpu_counter_add(&sbi->s_freeblocks_counter, count);
diff --git a/trunk/fs/ext4/mballoc.h b/trunk/fs/ext4/mballoc.h
deleted file mode 100644
index bfe6add46bcf..000000000000
--- a/trunk/fs/ext4/mballoc.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- *  fs/ext4/mballoc.h
- *
- *  Written by: Alex Tomas <alex@clusterfs.com>
- *
- */
-#ifndef _EXT4_MBALLOC_H
-#define _EXT4_MBALLOC_H
-
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/namei.h>
-#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
-#include <linux/module.h>
-#include <linux/swap.h>
-#include <linux/proc_fs.h>
-#include <linux/pagemap.h>
-#include <linux/seq_file.h>
-#include <linux/version.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
-#include "group.h"
-
-/*
- * with AGGRESSIVE_CHECK allocator runs consistency checks over
- * structures. these checks slow things down a lot
- */
-#define AGGRESSIVE_CHECK__
-
-/*
- * with DOUBLE_CHECK defined mballoc creates persistent in-core
- * bitmaps, maintains and uses them to check for double allocations
- */
-#define DOUBLE_CHECK__
-
-/*
- */
-#define MB_DEBUG__
-#ifdef MB_DEBUG
-#define mb_debug(fmt, a...)	printk(fmt, ##a)
-#else
-#define mb_debug(fmt, a...)
-#endif
-
-/*
- * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
- * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
- */
-#define EXT4_MB_HISTORY
-#define EXT4_MB_HISTORY_ALLOC		1	/* allocation */
-#define EXT4_MB_HISTORY_PREALLOC	2	/* preallocated blocks used */
-#define EXT4_MB_HISTORY_DISCARD		4	/* preallocation discarded */
-#define EXT4_MB_HISTORY_FREE		8	/* free */
-
-#define EXT4_MB_HISTORY_DEFAULT		(EXT4_MB_HISTORY_ALLOC | \
-					 EXT4_MB_HISTORY_PREALLOC)
-
-/*
- * How long mballoc can look for a best extent (in found extents)
- */
-#define MB_DEFAULT_MAX_TO_SCAN		200
-
-/*
- * How long mballoc must look for a best extent
- */
-#define MB_DEFAULT_MIN_TO_SCAN		10
-
-/*
- * How many groups mballoc will scan looking for the best chunk
- */
-#define MB_DEFAULT_MAX_GROUPS_TO_SCAN	5
-
-/*
- * with 'ext4_mb_stats' allocator will collect stats that will be
- * shown at umount. The collecting costs though!
- */
-#define MB_DEFAULT_STATS		1
-
-/*
- * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
- * by the stream allocator, which purpose is to pack requests
- * as close each to other as possible to produce smooth I/O traffic
- * We use locality group prealloc space for stream request.
- * We can tune the same via /proc/fs/ext4/<parition>/stream_req
- */
-#define MB_DEFAULT_STREAM_THRESHOLD	16	/* 64K */
-
-/*
- * for which requests use 2^N search using buddies
- */
-#define MB_DEFAULT_ORDER2_REQS		2
-
-/*
- * default group prealloc size 512 blocks
- */
-#define MB_DEFAULT_GROUP_PREALLOC	512
-
-static struct kmem_cache *ext4_pspace_cachep;
-static struct kmem_cache *ext4_ac_cachep;
-
-#ifdef EXT4_BB_MAX_BLOCKS
-#undef EXT4_BB_MAX_BLOCKS
-#endif
-#define EXT4_BB_MAX_BLOCKS	30
-
-struct ext4_free_metadata {
-	ext4_group_t group;
-	unsigned short num;
-	ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
-	struct list_head list;
-};
-
-struct ext4_group_info {
-	unsigned long	bb_state;
-	unsigned long	bb_tid;
-	struct ext4_free_metadata *bb_md_cur;
-	unsigned short	bb_first_free;
-	unsigned short	bb_free;
-	unsigned short	bb_fragments;
-	struct		list_head bb_prealloc_list;
-#ifdef DOUBLE_CHECK
-	void		*bb_bitmap;
-#endif
-	unsigned short	bb_counters[];
-};
-
-#define EXT4_GROUP_INFO_NEED_INIT_BIT	0
-#define EXT4_GROUP_INFO_LOCKED_BIT	1
-
-#define EXT4_MB_GRP_NEED_INIT(grp)	\
-	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
-
-
-struct ext4_prealloc_space {
-	struct list_head	pa_inode_list;
-	struct list_head	pa_group_list;
-	union {
-		struct list_head pa_tmp_list;
-		struct rcu_head	pa_rcu;
-	} u;
-	spinlock_t		pa_lock;
-	atomic_t		pa_count;
-	unsigned		pa_deleted;
-	ext4_fsblk_t		pa_pstart;	/* phys. block */
-	ext4_lblk_t		pa_lstart;	/* log. block */
-	unsigned short		pa_len;		/* len of preallocated chunk */
-	unsigned short		pa_free;	/* how many blocks are free */
-	unsigned short		pa_linear;	/* consumed in one direction
-						 * strictly, for grp prealloc */
-	spinlock_t		*pa_obj_lock;
-	struct inode		*pa_inode;	/* hack, for history only */
-};
-
-
-struct ext4_free_extent {
-	ext4_lblk_t fe_logical;
-	ext4_grpblk_t fe_start;
-	ext4_group_t fe_group;
-	int fe_len;
-};
-
-/*
- * Locality group:
- *   we try to group all related changes together
- *   so that writeback can flush/allocate them together as well
- */
-struct ext4_locality_group {
-	/* for allocator */
-	struct mutex		lg_mutex;	/* to serialize allocates */
-	struct list_head	lg_prealloc_list;/* list of preallocations */
-	spinlock_t		lg_prealloc_lock;
-};
-
-struct ext4_allocation_context {
-	struct inode *ac_inode;
-	struct super_block *ac_sb;
-
-	/* original request */
-	struct ext4_free_extent ac_o_ex;
-
-	/* goal request (after normalization) */
-	struct ext4_free_extent ac_g_ex;
-
-	/* the best found extent */
-	struct ext4_free_extent ac_b_ex;
-
-	/* copy of the bext found extent taken before preallocation efforts */
-	struct ext4_free_extent ac_f_ex;
-
-	/* number of iterations done. we have to track to limit searching */
-	unsigned long ac_ex_scanned;
-	__u16 ac_groups_scanned;
-	__u16 ac_found;
-	__u16 ac_tail;
-	__u16 ac_buddy;
-	__u16 ac_flags;		/* allocation hints */
-	__u8 ac_status;
-	__u8 ac_criteria;
-	__u8 ac_repeats;
-	__u8 ac_2order;		/* if request is to allocate 2^N blocks and
-				 * N > 0, the field stores N, otherwise 0 */
-	__u8 ac_op;		/* operation, for history only */
-	struct page *ac_bitmap_page;
-	struct page *ac_buddy_page;
-	struct ext4_prealloc_space *ac_pa;
-	struct ext4_locality_group *ac_lg;
-};
-
-#define AC_STATUS_CONTINUE	1
-#define AC_STATUS_FOUND		2
-#define AC_STATUS_BREAK		3
-
-struct ext4_mb_history {
-	struct ext4_free_extent orig;	/* orig allocation */
-	struct ext4_free_extent goal;	/* goal allocation */
-	struct ext4_free_extent result;	/* result allocation */
-	unsigned pid;
-	unsigned ino;
-	__u16 found;	/* how many extents have been found */
-	__u16 groups;	/* how many groups have been scanned */
-	__u16 tail;	/* what tail broke some buddy */
-	__u16 buddy;	/* buddy the tail ^^^ broke */
-	__u16 flags;
-	__u8 cr:3;	/* which phase the result extent was found at */
-	__u8 op:4;
-	__u8 merged:1;
-};
-
-struct ext4_buddy {
-	struct page *bd_buddy_page;
-	void *bd_buddy;
-	struct page *bd_bitmap_page;
-	void *bd_bitmap;
-	struct ext4_group_info *bd_info;
-	struct super_block *bd_sb;
-	__u16 bd_blkbits;
-	ext4_group_t bd_group;
-};
-#define EXT4_MB_BITMAP(e4b)	((e4b)->bd_bitmap)
-#define EXT4_MB_BUDDY(e4b)	((e4b)->bd_buddy)
-
-#ifndef EXT4_MB_HISTORY
-static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
-{
-	return;
-}
-#else
-static void ext4_mb_store_history(struct ext4_allocation_context *ac);
-#endif
-
-#define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
-
-static struct proc_dir_entry *proc_root_ext4;
-struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
-
-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-					ext4_group_t group);
-static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
-static void ext4_mb_free_committed_blocks(struct super_block *);
-static void ext4_mb_return_to_preallocation(struct inode *inode,
-					struct ext4_buddy *e4b, sector_t block,
-					int count);
-static void ext4_mb_put_pa(struct ext4_allocation_context *,
-			struct super_block *, struct ext4_prealloc_space *pa);
-static int ext4_mb_init_per_dev_proc(struct super_block *sb);
-static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
-
-
-static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
-{
-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-	bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-
-static inline void ext4_unlock_group(struct super_block *sb,
-					ext4_group_t group)
-{
-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-	bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-
-static inline int ext4_is_group_locked(struct super_block *sb,
-					ext4_group_t group)
-{
-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-	return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
-						&(grinfo->bb_state));
-}
-
-static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
-					struct ext4_free_extent *fex)
-{
-	ext4_fsblk_t block;
-
-	block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
-			+ fex->fe_start
-			+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-	return block;
-}
-#endif
diff --git a/trunk/fs/ext4/migrate.c b/trunk/fs/ext4/migrate.c
index b9e077ba07e9..5c1e27de7755 100644
--- a/trunk/fs/ext4/migrate.c
+++ b/trunk/fs/ext4/migrate.c
@@ -13,8 +13,8 @@
  */
 
 #include <linux/module.h>
-#include "ext4_jbd2.h"
-#include "ext4_extents.h"
+#include <linux/ext4_jbd2.h>
+#include <linux/ext4_fs_extents.h>
 
 /*
  * The contiguous blocks details which can be
@@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
 }
 
 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
-						struct inode *tmp_inode)
+				struct inode *tmp_inode)
 {
 	int retval;
 	__le32	i_data[3];
@@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
 	 * i_data field of the original inode
 	 */
 	retval = ext4_journal_extend(handle, 1);
-	if (retval) {
+	if (retval != 0) {
 		retval = ext4_journal_restart(handle, 1);
 		if (retval)
 			goto err_out;
@@ -350,18 +350,6 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
 	i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
 
 	down_write(&EXT4_I(inode)->i_data_sem);
-	/*
-	 * if EXT4_EXT_MIGRATE is cleared a block allocation
-	 * happened after we started the migrate. We need to
-	 * fail the migrate
-	 */
-	if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) {
-		retval = -EAGAIN;
-		up_write(&EXT4_I(inode)->i_data_sem);
-		goto err_out;
-	} else
-		EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
-							~EXT4_EXT_MIGRATE;
 	/*
 	 * We have the extent map build with the tmp inode.
 	 * Now copy the i_data across
@@ -520,17 +508,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
 	 * switch the inode format to prevent read.
 	 */
 	mutex_lock(&(inode->i_mutex));
-	/*
-	 * Even though we take i_mutex we can still cause block allocation
-	 * via mmap write to holes. If we have allocated new blocks we fail
-	 * migrate.  New block allocation will clear EXT4_EXT_MIGRATE flag.
-	 * The flag is updated with i_data_sem held to prevent racing with
-	 * block allocation.
-	 */
-	down_read((&EXT4_I(inode)->i_data_sem));
-	EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE;
-	up_read((&EXT4_I(inode)->i_data_sem));
-
 	handle = ext4_journal_start(inode, 1);
 
 	ei = EXT4_I(inode);
@@ -582,15 +559,9 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
 		 * tmp_inode
 		 */
 		free_ext_block(handle, tmp_inode);
-	else {
-		retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
-		if (retval)
-			/*
-			 * if we fail to swap inode data free the extent
-			 * details of the tmp inode
-			 */
-			free_ext_block(handle, tmp_inode);
-	}
+	else
+		retval = ext4_ext_swap_inode_data(handle, inode,
+							tmp_inode);
 
 	/* We mark the tmp_inode dirty via ext4_ext_tree_init. */
 	if (ext4_journal_extend(handle, 1) != 0)
diff --git a/trunk/fs/ext4/namei.c b/trunk/fs/ext4/namei.c
index ab16beaa830d..28aa2ed4297e 100644
--- a/trunk/fs/ext4/namei.c
+++ b/trunk/fs/ext4/namei.c
@@ -28,14 +28,14 @@
 #include <linux/pagemap.h>
 #include <linux/jbd2.h>
 #include <linux/time.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 #include <linux/bio.h>
-#include "ext4.h"
-#include "ext4_jbd2.h"
 
 #include "namei.h"
 #include "xattr.h"
@@ -57,15 +57,10 @@ static struct buffer_head *ext4_append(handle_t *handle,
 
 	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
 
-	bh = ext4_bread(handle, inode, *block, 1, err);
-	if (bh) {
+	if ((bh = ext4_bread(handle, inode, *block, 1, err))) {
 		inode->i_size += inode->i_sb->s_blocksize;
 		EXT4_I(inode)->i_disksize = inode->i_size;
-		*err = ext4_journal_get_write_access(handle, bh);
-		if (*err) {
-			brelse(bh);
-			bh = NULL;
-		}
+		ext4_journal_get_write_access(handle,bh);
 	}
 	return bh;
 }
@@ -353,7 +348,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 	if (root->info.hash_version != DX_HASH_TEA &&
 	    root->info.hash_version != DX_HASH_HALF_MD4 &&
 	    root->info.hash_version != DX_HASH_LEGACY) {
-		ext4_warning(dir->i_sb, __func__,
+		ext4_warning(dir->i_sb, __FUNCTION__,
 			     "Unrecognised inode hash code %d",
 			     root->info.hash_version);
 		brelse(bh);
@@ -367,7 +362,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 	hash = hinfo->hash;
 
 	if (root->info.unused_flags & 1) {
-		ext4_warning(dir->i_sb, __func__,
+		ext4_warning(dir->i_sb, __FUNCTION__,
 			     "Unimplemented inode hash flags: %#06x",
 			     root->info.unused_flags);
 		brelse(bh);
@@ -376,7 +371,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 	}
 
 	if ((indirect = root->info.indirect_levels) > 1) {
-		ext4_warning(dir->i_sb, __func__,
+		ext4_warning(dir->i_sb, __FUNCTION__,
 			     "Unimplemented inode hash depth: %#06x",
 			     root->info.indirect_levels);
 		brelse(bh);
@@ -389,7 +384,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 
 	if (dx_get_limit(entries) != dx_root_limit(dir,
 						   root->info.info_length)) {
-		ext4_warning(dir->i_sb, __func__,
+		ext4_warning(dir->i_sb, __FUNCTION__,
 			     "dx entry: limit != root limit");
 		brelse(bh);
 		*err = ERR_BAD_DX_DIR;
@@ -401,7 +396,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 	{
 		count = dx_get_count(entries);
 		if (!count || count > dx_get_limit(entries)) {
-			ext4_warning(dir->i_sb, __func__,
+			ext4_warning(dir->i_sb, __FUNCTION__,
 				     "dx entry: no count or count > limit");
 			brelse(bh);
 			*err = ERR_BAD_DX_DIR;
@@ -446,7 +441,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 			goto fail2;
 		at = entries = ((struct dx_node *) bh->b_data)->entries;
 		if (dx_get_limit(entries) != dx_node_limit (dir)) {
-			ext4_warning(dir->i_sb, __func__,
+			ext4_warning(dir->i_sb, __FUNCTION__,
 				     "dx entry: limit != node limit");
 			brelse(bh);
 			*err = ERR_BAD_DX_DIR;
@@ -462,7 +457,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 	}
 fail:
 	if (*err == ERR_BAD_DX_DIR)
-		ext4_warning(dir->i_sb, __func__,
+		ext4_warning(dir->i_sb, __FUNCTION__,
 			     "Corrupt dir inode %ld, running e2fsck is "
 			     "recommended.", dir->i_ino);
 	return NULL;
@@ -919,7 +914,7 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
-			ext4_error(sb, __func__, "reading directory #%lu "
+			ext4_error(sb, __FUNCTION__, "reading directory #%lu "
 				   "offset %lu", dir->i_ino,
 				   (unsigned long)block);
 			brelse(bh);
@@ -1012,7 +1007,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
 		retval = ext4_htree_next_block(dir, hash, frame,
 					       frames, NULL);
 		if (retval < 0) {
-			ext4_warning(sb, __func__,
+			ext4_warning(sb, __FUNCTION__,
 			     "error reading index page in directory #%lu",
 			     dir->i_ino);
 			*err = retval;
@@ -1537,7 +1532,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 
 		if (levels && (dx_get_count(frames->entries) ==
 			       dx_get_limit(frames->entries))) {
-			ext4_warning(sb, __func__,
+			ext4_warning(sb, __FUNCTION__,
 				     "Directory index full!");
 			err = -ENOSPC;
 			goto cleanup;
@@ -1865,11 +1860,11 @@ static int empty_dir (struct inode * inode)
 	if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
 	    !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
 		if (err)
-			ext4_error(inode->i_sb, __func__,
+			ext4_error(inode->i_sb, __FUNCTION__,
 				   "error %d reading directory #%lu offset 0",
 				   err, inode->i_ino);
 		else
-			ext4_warning(inode->i_sb, __func__,
+			ext4_warning(inode->i_sb, __FUNCTION__,
 				     "bad directory (dir #%lu) - no data block",
 				     inode->i_ino);
 		return 1;
@@ -1898,7 +1893,7 @@ static int empty_dir (struct inode * inode)
 				offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
 			if (!bh) {
 				if (err)
-					ext4_error(sb, __func__,
+					ext4_error(sb, __FUNCTION__,
 						   "error %d reading directory"
 						   " #%lu offset %lu",
 						   err, inode->i_ino, offset);
@@ -2222,8 +2217,6 @@ static int ext4_symlink (struct inode * dir,
 			goto out_stop;
 		}
 	} else {
-		/* clear the extent format for fast symlink */
-		EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
 		inode->i_op = &ext4_fast_symlink_inode_operations;
 		memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
 		inode->i_size = l-1;
@@ -2354,9 +2347,6 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
 					      EXT4_FEATURE_INCOMPAT_FILETYPE))
 			new_de->file_type = old_de->file_type;
 		new_dir->i_version++;
-		new_dir->i_ctime = new_dir->i_mtime =
-					ext4_current_time(new_dir);
-		ext4_mark_inode_dirty(handle, new_dir);
 		BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
 		ext4_journal_dirty_metadata(handle, new_bh);
 		brelse(new_bh);
diff --git a/trunk/fs/ext4/resize.c b/trunk/fs/ext4/resize.c
index 9f086a6a472b..e29efa0f9d62 100644
--- a/trunk/fs/ext4/resize.c
+++ b/trunk/fs/ext4/resize.c
@@ -11,10 +11,11 @@
 
 #define EXT4FS_DEBUG
 
+#include <linux/ext4_jbd2.h>
+
 #include <linux/errno.h>
 #include <linux/slab.h>
 
-#include "ext4_jbd2.h"
 #include "group.h"
 
 #define outside(b, first, last)	((b) < (first) || (b) >= (last))
@@ -49,63 +50,63 @@ static int verify_group_input(struct super_block *sb,
 
 	ext4_get_group_no_and_offset(sb, start, NULL, &offset);
 	if (group != sbi->s_groups_count)
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Cannot add at group %u (only %lu groups)",
 			     input->group, sbi->s_groups_count);
 	else if (offset != 0)
-			ext4_warning(sb, __func__, "Last group not full");
+			ext4_warning(sb, __FUNCTION__, "Last group not full");
 	else if (input->reserved_blocks > input->blocks_count / 5)
-		ext4_warning(sb, __func__, "Reserved blocks too high (%u)",
+		ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
 			     input->reserved_blocks);
 	else if (free_blocks_count < 0)
-		ext4_warning(sb, __func__, "Bad blocks count %u",
+		ext4_warning(sb, __FUNCTION__, "Bad blocks count %u",
 			     input->blocks_count);
 	else if (!(bh = sb_bread(sb, end - 1)))
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Cannot read last block (%llu)",
 			     end - 1);
 	else if (outside(input->block_bitmap, start, end))
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap not in group (block %llu)",
 			     (unsigned long long)input->block_bitmap);
 	else if (outside(input->inode_bitmap, start, end))
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap not in group (block %llu)",
 			     (unsigned long long)input->inode_bitmap);
 	else if (outside(input->inode_table, start, end) ||
 	         outside(itend - 1, start, end))
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode table not in group (blocks %llu-%llu)",
 			     (unsigned long long)input->inode_table, itend - 1);
 	else if (input->inode_bitmap == input->block_bitmap)
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap same as inode bitmap (%llu)",
 			     (unsigned long long)input->block_bitmap);
 	else if (inside(input->block_bitmap, input->inode_table, itend))
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap (%llu) in inode table (%llu-%llu)",
 			     (unsigned long long)input->block_bitmap,
 			     (unsigned long long)input->inode_table, itend - 1);
 	else if (inside(input->inode_bitmap, input->inode_table, itend))
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap (%llu) in inode table (%llu-%llu)",
 			     (unsigned long long)input->inode_bitmap,
 			     (unsigned long long)input->inode_table, itend - 1);
 	else if (inside(input->block_bitmap, start, metaend))
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap (%llu) in GDT table"
 			     " (%llu-%llu)",
 			     (unsigned long long)input->block_bitmap,
 			     start, metaend - 1);
 	else if (inside(input->inode_bitmap, start, metaend))
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap (%llu) in GDT table"
 			     " (%llu-%llu)",
 			     (unsigned long long)input->inode_bitmap,
 			     start, metaend - 1);
 	else if (inside(input->inode_table, start, metaend) ||
 	         inside(itend - 1, start, metaend))
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode table (%llu-%llu) overlaps"
 			     "GDT table (%llu-%llu)",
 			     (unsigned long long)input->inode_table,
@@ -367,7 +368,7 @@ static int verify_reserved_gdb(struct super_block *sb,
 	while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
 		if (le32_to_cpu(*p++) !=
 		    grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
-			ext4_warning(sb, __func__,
+			ext4_warning(sb, __FUNCTION__,
 				     "reserved GDT %llu"
 				     " missing grp %d (%llu)",
 				     blk, grp,
@@ -423,7 +424,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	 */
 	if (EXT4_SB(sb)->s_sbh->b_blocknr !=
 	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			"won't resize using backup superblock at %llu",
 			(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
 		return -EPERM;
@@ -447,7 +448,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 
 	data = (__le32 *)dind->b_data;
 	if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "new group %u GDT block %llu not reserved",
 			     input->group, gdblock);
 		err = -EINVAL;
@@ -468,10 +469,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 		goto exit_dindj;
 
 	n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
-			GFP_NOFS);
+			GFP_KERNEL);
 	if (!n_group_desc) {
 		err = -ENOMEM;
-		ext4_warning(sb, __func__,
+		ext4_warning (sb, __FUNCTION__,
 			      "not enough memory for %lu groups", gdb_num + 1);
 		goto exit_inode;
 	}
@@ -501,7 +502,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	EXT4_SB(sb)->s_gdb_count++;
 	kfree(o_group_desc);
 
-	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
+	es->s_reserved_gdt_blocks =
+		cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
 	ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 
 	return 0;
@@ -551,7 +553,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	int res, i;
 	int err;
 
-	primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
+	primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL);
 	if (!primary)
 		return -ENOMEM;
 
@@ -569,7 +571,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	/* Get each reserved primary GDT block and verify it holds backups */
 	for (res = 0; res < reserved_gdb; res++, blk++) {
 		if (le32_to_cpu(*data) != blk) {
-			ext4_warning(sb, __func__,
+			ext4_warning(sb, __FUNCTION__,
 				     "reserved block %llu"
 				     " not at offset %ld",
 				     blk,
@@ -713,7 +715,7 @@ static void update_backups(struct super_block *sb,
 	 */
 exit_err:
 	if (err) {
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "can't update backup for group %lu (err %d), "
 			     "forcing fsck on next reboot", group, err);
 		sbi->s_mount_state &= ~EXT4_VALID_FS;
@@ -753,33 +755,33 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 
 	if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
 					EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Can't resize non-sparse filesystem further");
 		return -EPERM;
 	}
 
 	if (ext4_blocks_count(es) + input->blocks_count <
 	    ext4_blocks_count(es)) {
-		ext4_warning(sb, __func__, "blocks_count overflow\n");
+		ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n");
 		return -EINVAL;
 	}
 
 	if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
 	    le32_to_cpu(es->s_inodes_count)) {
-		ext4_warning(sb, __func__, "inodes_count overflow\n");
+		ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n");
 		return -EINVAL;
 	}
 
 	if (reserved_gdb || gdb_off == 0) {
 		if (!EXT4_HAS_COMPAT_FEATURE(sb,
 					     EXT4_FEATURE_COMPAT_RESIZE_INODE)){
-			ext4_warning(sb, __func__,
+			ext4_warning(sb, __FUNCTION__,
 				     "No reserved GDT blocks, can't resize");
 			return -EPERM;
 		}
 		inode = ext4_iget(sb, EXT4_RESIZE_INO);
 		if (IS_ERR(inode)) {
-			ext4_warning(sb, __func__,
+			ext4_warning(sb, __FUNCTION__,
 				     "Error opening resize inode");
 			return PTR_ERR(inode);
 		}
@@ -808,7 +810,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 
 	lock_super(sb);
 	if (input->group != sbi->s_groups_count) {
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "multiple resizers run on filesystem!");
 		err = -EBUSY;
 		goto exit_journal;
@@ -875,7 +877,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	 */
 	ext4_blocks_count_set(es, ext4_blocks_count(es) +
 		input->blocks_count);
-	le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
+	es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
+		EXT4_INODES_PER_GROUP(sb));
 
 	/*
 	 * We need to protect s_groups_count against other CPUs seeing
@@ -974,13 +977,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 			" too large to resize to %llu blocks safely\n",
 			sb->s_id, n_blocks_count);
 		if (sizeof(sector_t) < 8)
-			ext4_warning(sb, __func__,
+			ext4_warning(sb, __FUNCTION__,
 			"CONFIG_LBD not enabled\n");
 		return -EINVAL;
 	}
 
 	if (n_blocks_count < o_blocks_count) {
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "can't shrink FS - resize aborted");
 		return -EBUSY;
 	}
@@ -989,7 +992,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
 
 	if (last == 0) {
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "need to use ext2online to resize further");
 		return -EPERM;
 	}
@@ -997,7 +1000,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	add = EXT4_BLOCKS_PER_GROUP(sb) - last;
 
 	if (o_blocks_count + add < o_blocks_count) {
-		ext4_warning(sb, __func__, "blocks_count overflow");
+		ext4_warning(sb, __FUNCTION__, "blocks_count overflow");
 		return -EINVAL;
 	}
 
@@ -1005,7 +1008,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 		add = n_blocks_count - o_blocks_count;
 
 	if (o_blocks_count + add < n_blocks_count)
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "will only finish group (%llu"
 			     " blocks, %u new)",
 			     o_blocks_count + add, add);
@@ -1013,7 +1016,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	/* See if the device is actually as big as what was requested */
 	bh = sb_bread(sb, o_blocks_count + add -1);
 	if (!bh) {
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "can't read last block, resize aborted");
 		return -ENOSPC;
 	}
@@ -1025,13 +1028,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	handle = ext4_journal_start_sb(sb, 3);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
-		ext4_warning(sb, __func__, "error %d on journal start", err);
+		ext4_warning(sb, __FUNCTION__, "error %d on journal start",err);
 		goto exit_put;
 	}
 
 	lock_super(sb);
 	if (o_blocks_count != ext4_blocks_count(es)) {
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "multiple resizers run on filesystem!");
 		unlock_super(sb);
 		ext4_journal_stop(handle);
@@ -1041,7 +1044,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
 	if ((err = ext4_journal_get_write_access(handle,
 						 EXT4_SB(sb)->s_sbh))) {
-		ext4_warning(sb, __func__,
+		ext4_warning(sb, __FUNCTION__,
 			     "error %d on journal write access", err);
 		unlock_super(sb);
 		ext4_journal_stop(handle);
diff --git a/trunk/fs/ext4/super.c b/trunk/fs/ext4/super.c
index 52dd0679a4e2..c81a8e759bad 100644
--- a/trunk/fs/ext4/super.c
+++ b/trunk/fs/ext4/super.c
@@ -21,6 +21,8 @@
 #include <linux/fs.h>
 #include <linux/time.h>
 #include <linux/jbd2.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd2.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/blkdev.h>
@@ -36,10 +38,9 @@
 #include <linux/seq_file.h>
 #include <linux/log2.h>
 #include <linux/crc16.h>
+
 #include <asm/uaccess.h>
 
-#include "ext4.h"
-#include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
 #include "namei.h"
@@ -134,7 +135,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 	 * take the FS itself readonly cleanly. */
 	journal = EXT4_SB(sb)->s_journal;
 	if (is_journal_aborted(journal)) {
-		ext4_abort(sb, __func__,
+		ext4_abort(sb, __FUNCTION__,
 			   "Detected aborted journal");
 		return ERR_PTR(-EROFS);
 	}
@@ -354,7 +355,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
 	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 		return;
 
-	ext4_warning(sb, __func__,
+	ext4_warning(sb, __FUNCTION__,
 		     "updating to rev %d because of new feature flag, "
 		     "running e2fsck is recommended",
 		     EXT4_DYNAMIC_REV);
@@ -944,8 +945,8 @@ static match_table_t tokens = {
 	{Opt_mballoc, "mballoc"},
 	{Opt_nomballoc, "nomballoc"},
 	{Opt_stripe, "stripe=%u"},
-	{Opt_resize, "resize"},
 	{Opt_err, NULL},
+	{Opt_resize, "resize"},
 };
 
 static ext4_fsblk_t get_sb_block(void **data)
@@ -1387,11 +1388,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 		 * a plain journaled filesystem we can keep it set as
 		 * valid forever! :)
 		 */
-	es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
+	es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS);
 #endif
 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
 		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
-	le16_add_cpu(&es->s_mnt_count, 1);
+	es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
 	es->s_mtime = cpu_to_le32(get_seconds());
 	ext4_update_dynamic_rev(sb);
 	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -1484,33 +1485,36 @@ static int ext4_check_descriptors(struct super_block *sb)
 		block_bitmap = ext4_block_bitmap(sb, gdp);
 		if (block_bitmap < first_block || block_bitmap > last_block)
 		{
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-			       "Block bitmap for group %lu not in group "
-			       "(block %llu)!", i, block_bitmap);
+			ext4_error (sb, "ext4_check_descriptors",
+				    "Block bitmap for group %lu"
+				    " not in group (block %llu)!",
+				    i, block_bitmap);
 			return 0;
 		}
 		inode_bitmap = ext4_inode_bitmap(sb, gdp);
 		if (inode_bitmap < first_block || inode_bitmap > last_block)
 		{
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-			       "Inode bitmap for group %lu not in group "
-			       "(block %llu)!", i, inode_bitmap);
+			ext4_error (sb, "ext4_check_descriptors",
+				    "Inode bitmap for group %lu"
+				    " not in group (block %llu)!",
+				    i, inode_bitmap);
 			return 0;
 		}
 		inode_table = ext4_inode_table(sb, gdp);
 		if (inode_table < first_block ||
 		    inode_table + sbi->s_itb_per_group - 1 > last_block)
 		{
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-			       "Inode table for group %lu not in group "
-			       "(block %llu)!", i, inode_table);
+			ext4_error (sb, "ext4_check_descriptors",
+				    "Inode table for group %lu"
+				    " not in group (block %llu)!",
+				    i, inode_table);
 			return 0;
 		}
 		if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-			       "Checksum for group %lu failed (%u!=%u)\n",
-			       i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
-			       gdp)), le16_to_cpu(gdp->bg_checksum));
+			ext4_error(sb, __FUNCTION__,
+				   "Checksum for group %lu failed (%u!=%u)\n",
+				    i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
+				    gdp)), le16_to_cpu(gdp->bg_checksum));
 			return 0;
 		}
 		if (!flexbg_flag)
@@ -1590,8 +1594,8 @@ static void ext4_orphan_cleanup (struct super_block * sb,
 	while (es->s_last_orphan) {
 		struct inode *inode;
 
-		inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
-		if (IS_ERR(inode)) {
+		if (!(inode =
+		      ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
 			es->s_last_orphan = 0;
 			break;
 		}
@@ -1601,7 +1605,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
 		if (inode->i_nlink) {
 			printk(KERN_DEBUG
 				"%s: truncating inode %lu to %Ld bytes\n",
-				__func__, inode->i_ino, inode->i_size);
+				__FUNCTION__, inode->i_ino, inode->i_size);
 			jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
 				  inode->i_ino, inode->i_size);
 			ext4_truncate(inode);
@@ -1609,7 +1613,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
 		} else {
 			printk(KERN_DEBUG
 				"%s: deleting unreferenced inode %lu\n",
-				__func__, inode->i_ino);
+				__FUNCTION__, inode->i_ino);
 			jbd_debug(2, "deleting unreferenced inode %lu\n",
 				  inode->i_ino);
 			nr_orphans++;
@@ -2695,9 +2699,9 @@ static void ext4_clear_journal_err(struct super_block * sb,
 		char nbuf[16];
 
 		errstr = ext4_decode_error(sb, j_errno, nbuf);
-		ext4_warning(sb, __func__, "Filesystem error recorded "
+		ext4_warning(sb, __FUNCTION__, "Filesystem error recorded "
 			     "from previous mount: %s", errstr);
-		ext4_warning(sb, __func__, "Marking fs in need of "
+		ext4_warning(sb, __FUNCTION__, "Marking fs in need of "
 			     "filesystem check.");
 
 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
@@ -2824,7 +2828,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
 	}
 
 	if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
-		ext4_abort(sb, __func__, "Abort forced by user");
+		ext4_abort(sb, __FUNCTION__, "Abort forced by user");
 
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
 		((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3036,14 +3040,8 @@ static int ext4_dquot_drop(struct inode *inode)
 
 	/* We may delete quota structure so we need to reserve enough blocks */
 	handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
-	if (IS_ERR(handle)) {
-		/*
-		 * We call dquot_drop() anyway to at least release references
-		 * to quota structures so that umount does not hang.
-		 */
-		dquot_drop(inode);
+	if (IS_ERR(handle))
 		return PTR_ERR(handle);
-	}
 	ret = dquot_drop(inode);
 	err = ext4_journal_stop(handle);
 	if (!ret)
diff --git a/trunk/fs/ext4/symlink.c b/trunk/fs/ext4/symlink.c
index e9178643dc01..e6f9da4287c4 100644
--- a/trunk/fs/ext4/symlink.c
+++ b/trunk/fs/ext4/symlink.c
@@ -19,8 +19,8 @@
 
 #include <linux/fs.h>
 #include <linux/jbd2.h>
+#include <linux/ext4_fs.h>
 #include <linux/namei.h>
-#include "ext4.h"
 #include "xattr.h"
 
 static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/trunk/fs/ext4/xattr.c b/trunk/fs/ext4/xattr.c
index 3fbc2c6c3d0e..e9054c1c7d93 100644
--- a/trunk/fs/ext4/xattr.c
+++ b/trunk/fs/ext4/xattr.c
@@ -53,11 +53,11 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/ext4_jbd2.h>
+#include <linux/ext4_fs.h>
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
 #include "xattr.h"
 #include "acl.h"
 
@@ -92,8 +92,6 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
 						 struct mb_cache_entry **);
 static void ext4_xattr_rehash(struct ext4_xattr_header *,
 			      struct ext4_xattr_entry *);
-static int ext4_xattr_list(struct inode *inode, char *buffer,
-			   size_t buffer_size);
 
 static struct mb_cache *ext4_xattr_cache;
 
@@ -227,7 +225,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
 	if (ext4_xattr_check_block(bh)) {
-bad_block:	ext4_error(inode->i_sb, __func__,
+bad_block:	ext4_error(inode->i_sb, __FUNCTION__,
 			   "inode %lu: bad block %llu", inode->i_ino,
 			   EXT4_I(inode)->i_file_acl);
 		error = -EIO;
@@ -369,7 +367,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
 	if (ext4_xattr_check_block(bh)) {
-		ext4_error(inode->i_sb, __func__,
+		ext4_error(inode->i_sb, __FUNCTION__,
 			   "inode %lu: bad block %llu", inode->i_ino,
 			   EXT4_I(inode)->i_file_acl);
 		error = -EIO;
@@ -422,7 +420,7 @@ ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
  * Returns a negative error number on failure, or the number of bytes
  * used / required on success.
  */
-static int
+int
 ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
 	int i_error, b_error;
@@ -486,7 +484,8 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 		get_bh(bh);
 		ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
 	} else {
-		le32_add_cpu(&BHDR(bh)->h_refcount, -1);
+		BHDR(bh)->h_refcount = cpu_to_le32(
+				le32_to_cpu(BHDR(bh)->h_refcount) - 1);
 		error = ext4_journal_dirty_metadata(handle, bh);
 		if (IS_SYNC(inode))
 			handle->h_sync = 1;
@@ -661,7 +660,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
 			atomic_read(&(bs->bh->b_count)),
 			le32_to_cpu(BHDR(bs->bh)->h_refcount));
 		if (ext4_xattr_check_block(bs->bh)) {
-			ext4_error(sb, __func__,
+			ext4_error(sb, __FUNCTION__,
 				"inode %lu: bad block %llu", inode->i_ino,
 				EXT4_I(inode)->i_file_acl);
 			error = -EIO;
@@ -739,7 +738,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 				ce = NULL;
 			}
 			ea_bdebug(bs->bh, "cloning");
-			s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
+			s->base = kmalloc(bs->bh->b_size, GFP_KERNEL);
 			error = -ENOMEM;
 			if (s->base == NULL)
 				goto cleanup;
@@ -751,7 +750,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 		}
 	} else {
 		/* Allocate a buffer where we construct the new block. */
-		s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
+		s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
 		/* assert(header == s->base) */
 		error = -ENOMEM;
 		if (s->base == NULL)
@@ -790,7 +789,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 				if (error)
 					goto cleanup_dquot;
 				lock_buffer(new_bh);
-				le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
+				BHDR(new_bh)->h_refcount = cpu_to_le32(1 +
+					le32_to_cpu(BHDR(new_bh)->h_refcount));
 				ea_bdebug(new_bh, "reusing; refcount now=%d",
 					le32_to_cpu(BHDR(new_bh)->h_refcount));
 				unlock_buffer(new_bh);
@@ -808,8 +808,10 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 			get_bh(new_bh);
 		} else {
 			/* We need to allocate a new block */
-			ext4_fsblk_t goal = ext4_group_first_block_no(sb,
-						EXT4_I(inode)->i_block_group);
+			ext4_fsblk_t goal = le32_to_cpu(
+					EXT4_SB(sb)->s_es->s_first_data_block) +
+				(ext4_fsblk_t)EXT4_I(inode)->i_block_group *
+				EXT4_BLOCKS_PER_GROUP(sb);
 			ext4_fsblk_t block = ext4_new_block(handle, inode,
 							goal, &error);
 			if (error)
@@ -861,7 +863,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 	goto cleanup;
 
 bad_block:
-	ext4_error(inode->i_sb, __func__,
+	ext4_error(inode->i_sb, __FUNCTION__,
 		   "inode %lu: bad block %llu", inode->i_ino,
 		   EXT4_I(inode)->i_file_acl);
 	goto cleanup;
@@ -1164,7 +1166,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
 		if (!bh)
 			goto cleanup;
 		if (ext4_xattr_check_block(bh)) {
-			ext4_error(inode->i_sb, __func__,
+			ext4_error(inode->i_sb, __FUNCTION__,
 				"inode %lu: bad block %llu", inode->i_ino,
 				EXT4_I(inode)->i_file_acl);
 			error = -EIO;
@@ -1339,14 +1341,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
 		goto cleanup;
 	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 	if (!bh) {
-		ext4_error(inode->i_sb, __func__,
+		ext4_error(inode->i_sb, __FUNCTION__,
 			"inode %lu: block %llu read error", inode->i_ino,
 			EXT4_I(inode)->i_file_acl);
 		goto cleanup;
 	}
 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
-		ext4_error(inode->i_sb, __func__,
+		ext4_error(inode->i_sb, __FUNCTION__,
 			"inode %lu: bad block %llu", inode->i_ino,
 			EXT4_I(inode)->i_file_acl);
 		goto cleanup;
@@ -1473,7 +1475,7 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
 		}
 		bh = sb_bread(inode->i_sb, ce->e_block);
 		if (!bh) {
-			ext4_error(inode->i_sb, __func__,
+			ext4_error(inode->i_sb, __FUNCTION__,
 				"inode %lu: block %lu read error",
 				inode->i_ino, (unsigned long) ce->e_block);
 		} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/trunk/fs/ext4/xattr.h b/trunk/fs/ext4/xattr.h
index 5992fe979bb9..d7f5d6a12651 100644
--- a/trunk/fs/ext4/xattr.h
+++ b/trunk/fs/ext4/xattr.h
@@ -74,6 +74,7 @@ extern struct xattr_handler ext4_xattr_security_handler;
 extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
 
 extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
+extern int ext4_xattr_list(struct inode *, char *, size_t);
 extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
 extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
 
@@ -97,6 +98,12 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 	return -EOPNOTSUPP;
 }
 
+static inline int
+ext4_xattr_list(struct inode *inode, void *buffer, size_t size)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int
 ext4_xattr_set(struct inode *inode, int name_index, const char *name,
 	       const void *value, size_t size, int flags)
diff --git a/trunk/fs/ext4/xattr_security.c b/trunk/fs/ext4/xattr_security.c
index ca5f89fc6cae..f17eaf2321b9 100644
--- a/trunk/fs/ext4/xattr_security.c
+++ b/trunk/fs/ext4/xattr_security.c
@@ -6,9 +6,9 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/fs.h>
+#include <linux/ext4_jbd2.h>
+#include <linux/ext4_fs.h>
 #include <linux/security.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
 #include "xattr.h"
 
 static size_t
diff --git a/trunk/fs/ext4/xattr_trusted.c b/trunk/fs/ext4/xattr_trusted.c
index fff33382cadc..e0f05acdafec 100644
--- a/trunk/fs/ext4/xattr_trusted.c
+++ b/trunk/fs/ext4/xattr_trusted.c
@@ -9,8 +9,8 @@
 #include <linux/string.h>
 #include <linux/capability.h>
 #include <linux/fs.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
+#include <linux/ext4_jbd2.h>
+#include <linux/ext4_fs.h>
 #include "xattr.h"
 
 #define XATTR_TRUSTED_PREFIX "trusted."
diff --git a/trunk/fs/ext4/xattr_user.c b/trunk/fs/ext4/xattr_user.c
index 67be723fcc4e..7ed3d8ebf096 100644
--- a/trunk/fs/ext4/xattr_user.c
+++ b/trunk/fs/ext4/xattr_user.c
@@ -8,8 +8,8 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
+#include <linux/ext4_jbd2.h>
+#include <linux/ext4_fs.h>
 #include "xattr.h"
 
 #define XATTR_USER_PREFIX "user."
diff --git a/trunk/fs/jbd2/commit.c b/trunk/fs/jbd2/commit.c
index e0139786f717..a8173081f831 100644
--- a/trunk/fs/jbd2/commit.c
+++ b/trunk/fs/jbd2/commit.c
@@ -519,6 +519,22 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
 	jbd_debug (3, "JBD: commit phase 2\n");
 
+	/*
+	 * First, drop modified flag: all accesses to the buffers
+	 * will be tracked for a new trasaction only -bzzz
+	 */
+	spin_lock(&journal->j_list_lock);
+	if (commit_transaction->t_buffers) {
+		new_jh = jh = commit_transaction->t_buffers->b_tnext;
+		do {
+			J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
+					new_jh->b_modified == 0);
+			new_jh->b_modified = 0;
+			new_jh = new_jh->b_tnext;
+		} while (new_jh != jh);
+	}
+	spin_unlock(&journal->j_list_lock);
+
 	/*
 	 * Now start flushing things to disk, in the order they appear
 	 * on the transaction lists.  Data blocks go first.
@@ -568,9 +584,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
 	stats.u.run.rs_blocks_logged = 0;
 
-	J_ASSERT(commit_transaction->t_nr_buffers <=
-		 commit_transaction->t_outstanding_credits);
-
 	descriptor = NULL;
 	bufs = 0;
 	while (commit_transaction->t_buffers) {
diff --git a/trunk/fs/jbd2/journal.c b/trunk/fs/jbd2/journal.c
index 53632e3e8457..eb7eb6c27bcb 100644
--- a/trunk/fs/jbd2/journal.c
+++ b/trunk/fs/jbd2/journal.c
@@ -534,7 +534,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
 	if (!tid_geq(journal->j_commit_request, tid)) {
 		printk(KERN_EMERG
 		       "%s: error: j_commit_request=%d, tid=%d\n",
-		       __func__, journal->j_commit_request, tid);
+		       __FUNCTION__, journal->j_commit_request, tid);
 	}
 	spin_unlock(&journal->j_state_lock);
 #endif
@@ -599,7 +599,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
 
 			printk(KERN_ALERT "%s: journal block not found "
 					"at offset %lu on %s\n",
-				__func__,
+				__FUNCTION__,
 				blocknr,
 				bdevname(journal->j_dev, b));
 			err = -EIO;
@@ -997,14 +997,13 @@ static journal_t * journal_init_common (void)
  */
 
 /**
- *  journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
+ *  journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure
  *  @bdev: Block device on which to create the journal
  *  @fs_dev: Device which hold journalled filesystem for this journal.
  *  @start: Block nr Start of journal.
  *  @len:  Length of the journal in blocks.
  *  @blocksize: blocksize of journalling device
- *
- *  Returns: a newly created journal_t *
+ *  @returns: a newly created journal_t *
  *
  *  jbd2_journal_init_dev creates a journal which maps a fixed contiguous
  *  range of blocks on an arbitrary block device.
@@ -1028,7 +1027,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 	journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
 	if (!journal->j_wbuf) {
 		printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
-			__func__);
+			__FUNCTION__);
 		kfree(journal);
 		journal = NULL;
 		goto out;
@@ -1084,7 +1083,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
 	journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
 	if (!journal->j_wbuf) {
 		printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
-			__func__);
+			__FUNCTION__);
 		kfree(journal);
 		return NULL;
 	}
@@ -1093,7 +1092,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
 	/* If that failed, give up */
 	if (err) {
 		printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
-		       __func__);
+		       __FUNCTION__);
 		kfree(journal);
 		return NULL;
 	}
@@ -1179,7 +1178,7 @@ int jbd2_journal_create(journal_t *journal)
 		 */
 		printk(KERN_EMERG
 		       "%s: creation of journal on external device!\n",
-		       __func__);
+		       __FUNCTION__);
 		BUG();
 	}
 
@@ -1977,10 +1976,9 @@ static int journal_init_jbd2_journal_head_cache(void)
 
 static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
 {
-	if (jbd2_journal_head_cache) {
-		kmem_cache_destroy(jbd2_journal_head_cache);
-		jbd2_journal_head_cache = NULL;
-	}
+	J_ASSERT(jbd2_journal_head_cache != NULL);
+	kmem_cache_destroy(jbd2_journal_head_cache);
+	jbd2_journal_head_cache = NULL;
 }
 
 /*
@@ -1999,7 +1997,7 @@ static struct journal_head *journal_alloc_journal_head(void)
 		jbd_debug(1, "out of memory for journal_head\n");
 		if (time_after(jiffies, last_warning + 5*HZ)) {
 			printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
-			       __func__);
+			       __FUNCTION__);
 			last_warning = jiffies;
 		}
 		while (!ret) {
@@ -2136,13 +2134,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
 			if (jh->b_frozen_data) {
 				printk(KERN_WARNING "%s: freeing "
 						"b_frozen_data\n",
-						__func__);
+						__FUNCTION__);
 				jbd2_free(jh->b_frozen_data, bh->b_size);
 			}
 			if (jh->b_committed_data) {
 				printk(KERN_WARNING "%s: freeing "
 						"b_committed_data\n",
-						__func__);
+						__FUNCTION__);
 				jbd2_free(jh->b_committed_data, bh->b_size);
 			}
 			bh->b_private = NULL;
@@ -2307,12 +2305,10 @@ static int __init journal_init(void)
 	BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
 
 	ret = journal_init_caches();
-	if (ret == 0) {
-		jbd2_create_debugfs_entry();
-		jbd2_create_jbd_stats_proc_entry();
-	} else {
+	if (ret != 0)
 		jbd2_journal_destroy_caches();
-	}
+	jbd2_create_debugfs_entry();
+	jbd2_create_jbd_stats_proc_entry();
 	return ret;
 }
 
diff --git a/trunk/fs/jbd2/revoke.c b/trunk/fs/jbd2/revoke.c
index 257ff2625765..2e1453a5e998 100644
--- a/trunk/fs/jbd2/revoke.c
+++ b/trunk/fs/jbd2/revoke.c
@@ -139,7 +139,7 @@ static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
 oom:
 	if (!journal_oom_retry)
 		return -ENOMEM;
-	jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
+	jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__);
 	yield();
 	goto repeat;
 }
@@ -167,121 +167,138 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
 	return NULL;
 }
 
-void jbd2_journal_destroy_revoke_caches(void)
-{
-	if (jbd2_revoke_record_cache) {
-		kmem_cache_destroy(jbd2_revoke_record_cache);
-		jbd2_revoke_record_cache = NULL;
-	}
-	if (jbd2_revoke_table_cache) {
-		kmem_cache_destroy(jbd2_revoke_table_cache);
-		jbd2_revoke_table_cache = NULL;
-	}
-}
-
 int __init jbd2_journal_init_revoke_caches(void)
 {
-	J_ASSERT(!jbd2_revoke_record_cache);
-	J_ASSERT(!jbd2_revoke_table_cache);
-
 	jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
 					   sizeof(struct jbd2_revoke_record_s),
 					   0,
 					   SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
 					   NULL);
 	if (!jbd2_revoke_record_cache)
-		goto record_cache_failure;
+		return -ENOMEM;
 
 	jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
 					   sizeof(struct jbd2_revoke_table_s),
 					   0, SLAB_TEMPORARY, NULL);
-	if (!jbd2_revoke_table_cache)
-		goto table_cache_failure;
-	return 0;
-table_cache_failure:
-	jbd2_journal_destroy_revoke_caches();
-record_cache_failure:
+	if (!jbd2_revoke_table_cache) {
+		kmem_cache_destroy(jbd2_revoke_record_cache);
+		jbd2_revoke_record_cache = NULL;
 		return -ENOMEM;
+	}
+	return 0;
 }
 
-static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
+void jbd2_journal_destroy_revoke_caches(void)
 {
-	int shift = 0;
-	int tmp = hash_size;
-	struct jbd2_revoke_table_s *table;
+	kmem_cache_destroy(jbd2_revoke_record_cache);
+	jbd2_revoke_record_cache = NULL;
+	kmem_cache_destroy(jbd2_revoke_table_cache);
+	jbd2_revoke_table_cache = NULL;
+}
 
-	table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
-	if (!table)
-		goto out;
+/* Initialise the revoke table for a given journal to a given size. */
 
+int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
+{
+	int shift, tmp;
+
+	J_ASSERT (journal->j_revoke_table[0] == NULL);
+
+	shift = 0;
+	tmp = hash_size;
 	while((tmp >>= 1UL) != 0UL)
 		shift++;
 
-	table->hash_size = hash_size;
-	table->hash_shift = shift;
-	table->hash_table =
+	journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
+	if (!journal->j_revoke_table[0])
+		return -ENOMEM;
+	journal->j_revoke = journal->j_revoke_table[0];
+
+	/* Check that the hash_size is a power of two */
+	J_ASSERT(is_power_of_2(hash_size));
+
+	journal->j_revoke->hash_size = hash_size;
+
+	journal->j_revoke->hash_shift = shift;
+
+	journal->j_revoke->hash_table =
 		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
-	if (!table->hash_table) {
-		kmem_cache_free(jbd2_revoke_table_cache, table);
-		table = NULL;
-		goto out;
+	if (!journal->j_revoke->hash_table) {
+		kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
+		journal->j_revoke = NULL;
+		return -ENOMEM;
 	}
 
 	for (tmp = 0; tmp < hash_size; tmp++)
-		INIT_LIST_HEAD(&table->hash_table[tmp]);
+		INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
 
-out:
-	return table;
-}
-
-static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
-{
-	int i;
-	struct list_head *hash_list;
-
-	for (i = 0; i < table->hash_size; i++) {
-		hash_list = &table->hash_table[i];
-		J_ASSERT(list_empty(hash_list));
+	journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
+	if (!journal->j_revoke_table[1]) {
+		kfree(journal->j_revoke_table[0]->hash_table);
+		kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
+		return -ENOMEM;
 	}
 
-	kfree(table->hash_table);
-	kmem_cache_free(jbd2_revoke_table_cache, table);
-}
+	journal->j_revoke = journal->j_revoke_table[1];
 
-/* Initialise the revoke table for a given journal to a given size. */
-int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
-{
-	J_ASSERT(journal->j_revoke_table[0] == NULL);
+	/* Check that the hash_size is a power of two */
 	J_ASSERT(is_power_of_2(hash_size));
 
-	journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size);
-	if (!journal->j_revoke_table[0])
-		goto fail0;
+	journal->j_revoke->hash_size = hash_size;
 
-	journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size);
-	if (!journal->j_revoke_table[1])
-		goto fail1;
+	journal->j_revoke->hash_shift = shift;
 
-	journal->j_revoke = journal->j_revoke_table[1];
+	journal->j_revoke->hash_table =
+		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
+	if (!journal->j_revoke->hash_table) {
+		kfree(journal->j_revoke_table[0]->hash_table);
+		kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
+		kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]);
+		journal->j_revoke = NULL;
+		return -ENOMEM;
+	}
+
+	for (tmp = 0; tmp < hash_size; tmp++)
+		INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
 
 	spin_lock_init(&journal->j_revoke_lock);
 
 	return 0;
-
-fail1:
-	jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
-fail0:
-	return -ENOMEM;
 }
 
-/* Destroy a journal's revoke table.  The table must already be empty! */
+/* Destoy a journal's revoke table.  The table must already be empty! */
+
 void jbd2_journal_destroy_revoke(journal_t *journal)
 {
+	struct jbd2_revoke_table_s *table;
+	struct list_head *hash_list;
+	int i;
+
+	table = journal->j_revoke_table[0];
+	if (!table)
+		return;
+
+	for (i=0; i<table->hash_size; i++) {
+		hash_list = &table->hash_table[i];
+		J_ASSERT (list_empty(hash_list));
+	}
+
+	kfree(table->hash_table);
+	kmem_cache_free(jbd2_revoke_table_cache, table);
+	journal->j_revoke = NULL;
+
+	table = journal->j_revoke_table[1];
+	if (!table)
+		return;
+
+	for (i=0; i<table->hash_size; i++) {
+		hash_list = &table->hash_table[i];
+		J_ASSERT (list_empty(hash_list));
+	}
+
+	kfree(table->hash_table);
+	kmem_cache_free(jbd2_revoke_table_cache, table);
 	journal->j_revoke = NULL;
-	if (journal->j_revoke_table[0])
-		jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
-	if (journal->j_revoke_table[1])
-		jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]);
 }
 
 
diff --git a/trunk/fs/jbd2/transaction.c b/trunk/fs/jbd2/transaction.c
index d6e006e67804..b9b0b6f899b9 100644
--- a/trunk/fs/jbd2/transaction.c
+++ b/trunk/fs/jbd2/transaction.c
@@ -617,12 +617,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
 	    jh->b_next_transaction == transaction)
 		goto done;
 
-	/*
-	 * this is the first time this transaction is touching this buffer,
-	 * reset the modified flag
-	 */
-       jh->b_modified = 0;
-
 	/*
 	 * If there is already a copy-out version of this buffer, then we don't
 	 * need to make another one
@@ -696,7 +690,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
 				if (!frozen_buffer) {
 					printk(KERN_EMERG
 					       "%s: OOM for frozen_buffer\n",
-					       __func__);
+					       __FUNCTION__);
 					JBUFFER_TRACE(jh, "oom!");
 					error = -ENOMEM;
 					jbd_lock_bh_state(bh);
@@ -835,16 +829,9 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 
 	if (jh->b_transaction == NULL) {
 		jh->b_transaction = transaction;
-
-		/* first access by this transaction */
-		jh->b_modified = 0;
-
 		JBUFFER_TRACE(jh, "file as BJ_Reserved");
 		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
 	} else if (jh->b_transaction == journal->j_committing_transaction) {
-		/* first access by this transaction */
-		jh->b_modified = 0;
-
 		JBUFFER_TRACE(jh, "set next transaction");
 		jh->b_next_transaction = transaction;
 	}
@@ -914,7 +901,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 		committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
 		if (!committed_data) {
 			printk(KERN_EMERG "%s: No memory for committed data\n",
-				__func__);
+				__FUNCTION__);
 			err = -ENOMEM;
 			goto out;
 		}
@@ -1243,7 +1230,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 	struct journal_head *jh;
 	int drop_reserve = 0;
 	int err = 0;
-	int was_modified = 0;
 
 	BUFFER_TRACE(bh, "entry");
 
@@ -1262,9 +1248,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 		goto not_jbd;
 	}
 
-	/* keep track of wether or not this transaction modified us */
-	was_modified = jh->b_modified;
-
 	/*
 	 * The buffer's going from the transaction, we must drop
 	 * all references -bzzz
@@ -1282,12 +1265,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 
 		JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
 
-		/*
-		 * we only want to drop a reference if this transaction
-		 * modified the buffer
-		 */
-		if (was_modified)
-			drop_reserve = 1;
+		drop_reserve = 1;
 
 		/*
 		 * We are no longer going to journal this buffer.
@@ -1327,13 +1305,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 		if (jh->b_next_transaction) {
 			J_ASSERT(jh->b_next_transaction == transaction);
 			jh->b_next_transaction = NULL;
-
-			/*
-			 * only drop a reference if this transaction modified
-			 * the buffer
-			 */
-			if (was_modified)
-				drop_reserve = 1;
+			drop_reserve = 1;
 		}
 	}
 
@@ -1462,8 +1434,7 @@ int jbd2_journal_stop(handle_t *handle)
 	return err;
 }
 
-/**
- * int jbd2_journal_force_commit() - force any uncommitted transactions
+/**int jbd2_journal_force_commit() - force any uncommitted transactions
  * @journal: journal to force
  *
  * For synchronous operations: force any uncommitted transactions
@@ -2106,7 +2077,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
 	jh->b_transaction = jh->b_next_transaction;
 	jh->b_next_transaction = NULL;
 	__jbd2_journal_file_buffer(jh, jh->b_transaction,
-				jh->b_modified ? BJ_Metadata : BJ_Reserved);
+				was_dirty ? BJ_Metadata : BJ_Reserved);
 	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
 
 	if (was_dirty)
diff --git a/trunk/fs/xfs/Kconfig b/trunk/fs/xfs/Kconfig
index 3f53dd101f99..524021ff5436 100644
--- a/trunk/fs/xfs/Kconfig
+++ b/trunk/fs/xfs/Kconfig
@@ -64,16 +64,3 @@ config XFS_RT
 	  See the xfs man page in section 5 for additional information.
 
 	  If unsure, say N.
-
-config XFS_DEBUG
-	bool "XFS Debugging support (EXPERIMENTAL)"
-	depends on XFS_FS && EXPERIMENTAL
-	help
-	  Say Y here to get an XFS build with many debugging features,
-	  including ASSERT checks, function wrappers around macros,
-	  and extra sanity-checking functions in various code paths.
-
-	  Note that the resulting code will be HUGE and SLOW, and probably
-	  not useful unless you are debugging a particular problem.
-
-	  Say N unless you are an XFS developer, or you play one on TV.
diff --git a/trunk/fs/xfs/linux-2.6/mrlock.h b/trunk/fs/xfs/linux-2.6/mrlock.h
index ff6a19873e5c..c110bb002665 100644
--- a/trunk/fs/xfs/linux-2.6/mrlock.h
+++ b/trunk/fs/xfs/linux-2.6/mrlock.h
@@ -20,24 +20,29 @@
 
 #include <linux/rwsem.h>
 
+enum { MR_NONE, MR_ACCESS, MR_UPDATE };
+
 typedef struct {
 	struct rw_semaphore	mr_lock;
-#ifdef DEBUG
 	int			mr_writer;
-#endif
 } mrlock_t;
 
-#ifdef DEBUG
 #define mrinit(mrp, name)	\
 	do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
-#else
-#define mrinit(mrp, name)	\
-	do { init_rwsem(&(mrp)->mr_lock); } while (0)
-#endif
-
 #define mrlock_init(mrp, t,n,s)	mrinit(mrp, n)
 #define mrfree(mrp)		do { } while (0)
 
+static inline void mraccess(mrlock_t *mrp)
+{
+	down_read(&mrp->mr_lock);
+}
+
+static inline void mrupdate(mrlock_t *mrp)
+{
+	down_write(&mrp->mr_lock);
+	mrp->mr_writer = 1;
+}
+
 static inline void mraccess_nested(mrlock_t *mrp, int subclass)
 {
 	down_read_nested(&mrp->mr_lock, subclass);
@@ -46,11 +51,10 @@ static inline void mraccess_nested(mrlock_t *mrp, int subclass)
 static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
 {
 	down_write_nested(&mrp->mr_lock, subclass);
-#ifdef DEBUG
 	mrp->mr_writer = 1;
-#endif
 }
 
+
 static inline int mrtryaccess(mrlock_t *mrp)
 {
 	return down_read_trylock(&mrp->mr_lock);
@@ -60,31 +64,39 @@ static inline int mrtryupdate(mrlock_t *mrp)
 {
 	if (!down_write_trylock(&mrp->mr_lock))
 		return 0;
-#ifdef DEBUG
 	mrp->mr_writer = 1;
-#endif
 	return 1;
 }
 
-static inline void mrunlock_excl(mrlock_t *mrp)
+static inline void mrunlock(mrlock_t *mrp)
 {
-#ifdef DEBUG
-	mrp->mr_writer = 0;
-#endif
-	up_write(&mrp->mr_lock);
-}
-
-static inline void mrunlock_shared(mrlock_t *mrp)
-{
-	up_read(&mrp->mr_lock);
+	if (mrp->mr_writer) {
+		mrp->mr_writer = 0;
+		up_write(&mrp->mr_lock);
+	} else {
+		up_read(&mrp->mr_lock);
+	}
 }
 
 static inline void mrdemote(mrlock_t *mrp)
 {
-#ifdef DEBUG
 	mrp->mr_writer = 0;
-#endif
 	downgrade_write(&mrp->mr_lock);
 }
 
+#ifdef DEBUG
+/*
+ * Debug-only routine, without some platform-specific asm code, we can
+ * now only answer requests regarding whether we hold the lock for write
+ * (reader state is outside our visibility, we only track writer state).
+ * Note: means !ismrlocked would give false positives, so don't do that.
+ */
+static inline int ismrlocked(mrlock_t *mrp, int type)
+{
+	if (mrp && type == MR_UPDATE)
+		return mrp->mr_writer;
+	return 1;
+}
+#endif
+
 #endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.c b/trunk/fs/xfs/linux-2.6/xfs_buf.c
index 5105015a75ad..52f6846101d5 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_buf.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_buf.c
@@ -886,7 +886,7 @@ int
 xfs_buf_lock_value(
 	xfs_buf_t		*bp)
 {
-	return bp->b_sema.count;
+	return atomic_read(&bp->b_sema.count);
 }
 #endif
 
diff --git a/trunk/fs/xfs/linux-2.6/xfs_export.c b/trunk/fs/xfs/linux-2.6/xfs_export.c
index c672b3238b14..265f0168ab76 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_export.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_export.c
@@ -133,7 +133,7 @@ xfs_nfs_get_inode(
 	if (!ip)
 		return ERR_PTR(-EIO);
 
-	if (ip->i_d.di_gen != generation) {
+	if (!ip->i_d.di_mode || ip->i_d.di_gen != generation) {
 		xfs_iput_new(ip, XFS_ILOCK_SHARED);
 		return ERR_PTR(-ENOENT);
 	}
diff --git a/trunk/fs/xfs/linux-2.6/xfs_file.c b/trunk/fs/xfs/linux-2.6/xfs_file.c
index 65e78c13d4ae..05905246434d 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_file.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_file.c
@@ -43,6 +43,9 @@
 #include <linux/smp_lock.h>
 
 static struct vm_operations_struct xfs_file_vm_ops;
+#ifdef CONFIG_XFS_DMAPI
+static struct vm_operations_struct xfs_dmapi_file_vm_ops;
+#endif
 
 STATIC_INLINE ssize_t
 __xfs_file_read(
@@ -199,6 +202,22 @@ xfs_file_fsync(
 			(xfs_off_t)0, (xfs_off_t)-1);
 }
 
+#ifdef CONFIG_XFS_DMAPI
+STATIC int
+xfs_vm_fault(
+	struct vm_area_struct	*vma,
+	struct vm_fault	*vmf)
+{
+	struct inode	*inode = vma->vm_file->f_path.dentry->d_inode;
+	bhv_vnode_t	*vp = vn_from_inode(inode);
+
+	ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
+	if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0))
+		return VM_FAULT_SIGBUS;
+	return filemap_fault(vma, vmf);
+}
+#endif /* CONFIG_XFS_DMAPI */
+
 /*
  * Unfortunately we can't just use the clean and simple readdir implementation
  * below, because nfs might call back into ->lookup from the filldir callback
@@ -367,6 +386,11 @@ xfs_file_mmap(
 	vma->vm_ops = &xfs_file_vm_ops;
 	vma->vm_flags |= VM_CAN_NONLINEAR;
 
+#ifdef CONFIG_XFS_DMAPI
+	if (XFS_M(filp->f_path.dentry->d_inode->i_sb)->m_flags & XFS_MOUNT_DMAPI)
+		vma->vm_ops = &xfs_dmapi_file_vm_ops;
+#endif /* CONFIG_XFS_DMAPI */
+
 	file_accessed(filp);
 	return 0;
 }
@@ -413,6 +437,47 @@ xfs_file_ioctl_invis(
 	return error;
 }
 
+#ifdef CONFIG_XFS_DMAPI
+#ifdef HAVE_VMOP_MPROTECT
+STATIC int
+xfs_vm_mprotect(
+	struct vm_area_struct *vma,
+	unsigned int	newflags)
+{
+	struct inode	*inode = vma->vm_file->f_path.dentry->d_inode;
+	struct xfs_mount *mp = XFS_M(inode->i_sb);
+	int		error = 0;
+
+	if (mp->m_flags & XFS_MOUNT_DMAPI) {
+		if ((vma->vm_flags & VM_MAYSHARE) &&
+		    (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE))
+			error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
+	}
+	return error;
+}
+#endif /* HAVE_VMOP_MPROTECT */
+#endif /* CONFIG_XFS_DMAPI */
+
+#ifdef HAVE_FOP_OPEN_EXEC
+/* If the user is attempting to execute a file that is offline then
+ * we have to trigger a DMAPI READ event before the file is marked as busy
+ * otherwise the invisible I/O will not be able to write to the file to bring
+ * it back online.
+ */
+STATIC int
+xfs_file_open_exec(
+	struct inode	*inode)
+{
+	struct xfs_mount *mp = XFS_M(inode->i_sb);
+	struct xfs_inode *ip = XFS_I(inode);
+
+	if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) &&
+	             DM_EVENT_ENABLED(ip, DM_EVENT_READ))
+		return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
+	return 0;
+}
+#endif /* HAVE_FOP_OPEN_EXEC */
+
 /*
  * mmap()d file has taken write protection fault and is being made
  * writable. We can set the page state up correctly for a writable
@@ -481,3 +546,13 @@ static struct vm_operations_struct xfs_file_vm_ops = {
 	.fault		= filemap_fault,
 	.page_mkwrite	= xfs_vm_page_mkwrite,
 };
+
+#ifdef CONFIG_XFS_DMAPI
+static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
+	.fault		= xfs_vm_fault,
+	.page_mkwrite	= xfs_vm_page_mkwrite,
+#ifdef HAVE_VMOP_MPROTECT
+	.mprotect	= xfs_vm_mprotect,
+#endif
+};
+#endif /* CONFIG_XFS_DMAPI */
diff --git a/trunk/fs/xfs/linux-2.6/xfs_ioctl.c b/trunk/fs/xfs/linux-2.6/xfs_ioctl.c
index a42ba9d71156..4ddb86b73c6b 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -238,7 +238,7 @@ xfs_vget_fsop_handlereq(
 		return error;
 	if (ip == NULL)
 		return XFS_ERROR(EIO);
-	if (ip->i_d.di_gen != igen) {
+	if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) {
 		xfs_iput_new(ip, XFS_ILOCK_SHARED);
 		return XFS_ERROR(ENOENT);
 	}
@@ -505,14 +505,14 @@ xfs_attrmulti_attr_get(
 {
 	char			*kbuf;
 	int			error = EFAULT;
-
+	
 	if (*len > XATTR_SIZE_MAX)
 		return EINVAL;
 	kbuf = kmalloc(*len, GFP_KERNEL);
 	if (!kbuf)
 		return ENOMEM;
 
-	error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
+	error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags, NULL);
 	if (error)
 		goto out_kfree;
 
@@ -546,7 +546,7 @@ xfs_attrmulti_attr_set(
 
 	if (copy_from_user(kbuf, ubuf, len))
 		goto out_kfree;
-
+			
 	error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
 
  out_kfree:
diff --git a/trunk/fs/xfs/linux-2.6/xfs_iops.c b/trunk/fs/xfs/linux-2.6/xfs_iops.c
index 2bf287ef5489..a1237dad6430 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_iops.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_iops.c
@@ -511,8 +511,7 @@ xfs_vn_rename(
 	xfs_dentry_to_name(&nname, ndentry);
 
 	error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
-			   XFS_I(ndir), &nname, new_inode ?
-			   			XFS_I(new_inode) : NULL);
+							XFS_I(ndir), &nname);
 	if (likely(!error)) {
 		if (new_inode)
 			xfs_validate_fields(new_inode);
diff --git a/trunk/fs/xfs/linux-2.6/xfs_linux.h b/trunk/fs/xfs/linux-2.6/xfs_linux.h
index 1bc9f600365f..e5143323e71f 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_linux.h
+++ b/trunk/fs/xfs/linux-2.6/xfs_linux.h
@@ -99,6 +99,7 @@
 /*
  * Feature macros (disable/enable)
  */
+#define HAVE_SPLICE	/* a splice(2) exists in 2.6, but not in 2.4 */
 #ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
 #else
diff --git a/trunk/fs/xfs/linux-2.6/xfs_lrw.c b/trunk/fs/xfs/linux-2.6/xfs_lrw.c
index 5e3b57516ec7..1ebd8004469c 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_lrw.c
@@ -394,7 +394,7 @@ xfs_zero_last_block(
 	int		error = 0;
 	xfs_bmbt_irec_t	imap;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
 
 	zero_offset = XFS_B_FSB_OFFSET(mp, isize);
 	if (zero_offset == 0) {
@@ -425,14 +425,14 @@ xfs_zero_last_block(
 	 * out sync.  We need to drop the ilock while we do this so we
 	 * don't deadlock when the buffer cache calls back to us.
 	 */
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
 
 	zero_len = mp->m_sb.sb_blocksize - zero_offset;
 	if (isize + zero_len > offset)
 		zero_len = offset - isize;
 	error = xfs_iozero(ip, isize, zero_len);
 
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 	ASSERT(error >= 0);
 	return error;
 }
@@ -465,7 +465,8 @@ xfs_zero_eof(
 	int		error = 0;
 	xfs_bmbt_irec_t	imap;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+	ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
 	ASSERT(offset > isize);
 
 	/*
@@ -474,7 +475,8 @@ xfs_zero_eof(
 	 */
 	error = xfs_zero_last_block(ip, offset, isize);
 	if (error) {
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+		ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+		ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
 		return error;
 	}
 
@@ -505,7 +507,8 @@ xfs_zero_eof(
 		error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
 				  0, NULL, 0, &imap, &nimaps, NULL, NULL);
 		if (error) {
-			ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+			ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+			ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
 			return error;
 		}
 		ASSERT(nimaps > 0);
@@ -529,7 +532,7 @@ xfs_zero_eof(
 		 * Drop the inode lock while we're doing the I/O.
 		 * We'll still have the iolock to protect us.
 		 */
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 
 		zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
 		zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
@@ -545,13 +548,13 @@ xfs_zero_eof(
 		start_zero_fsb = imap.br_startoff + imap.br_blockcount;
 		ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
 
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 	}
 
 	return 0;
 
 out_lock:
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 	ASSERT(error >= 0);
 	return error;
 }
diff --git a/trunk/fs/xfs/linux-2.6/xfs_lrw.h b/trunk/fs/xfs/linux-2.6/xfs_lrw.h
index e6be37dbd0e9..e1d498b4ba7a 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/trunk/fs/xfs/linux-2.6/xfs_lrw.h
@@ -50,6 +50,7 @@ struct xfs_iomap;
 #define	XFS_INVAL_CACHED	18
 #define	XFS_DIORD_ENTER		19
 #define	XFS_DIOWR_ENTER		20
+#define	XFS_SENDFILE_ENTER	21
 #define	XFS_WRITEPAGE_ENTER	22
 #define	XFS_RELEASEPAGE_ENTER	23
 #define	XFS_INVALIDPAGE_ENTER	24
diff --git a/trunk/fs/xfs/linux-2.6/xfs_super.c b/trunk/fs/xfs/linux-2.6/xfs_super.c
index 742b2c7852c1..865eb708aa95 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_super.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_super.c
@@ -1181,7 +1181,7 @@ xfs_fs_statfs(
 	statp->f_fsid.val[0] = (u32)id;
 	statp->f_fsid.val[1] = (u32)(id >> 32);
 
-	xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+	xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
 
 	spin_lock(&mp->m_sb_lock);
 	statp->f_bsize = sbp->sb_blocksize;
diff --git a/trunk/fs/xfs/linux-2.6/xfs_vnode.h b/trunk/fs/xfs/linux-2.6/xfs_vnode.h
index 9d73cb5c0fc7..8b4d63ce8694 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/trunk/fs/xfs/linux-2.6/xfs_vnode.h
@@ -25,6 +25,12 @@ struct attrlist_cursor_kern;
 
 typedef struct inode	bhv_vnode_t;
 
+#define VN_ISLNK(vp)	S_ISLNK((vp)->i_mode)
+#define VN_ISREG(vp)	S_ISREG((vp)->i_mode)
+#define VN_ISDIR(vp)	S_ISDIR((vp)->i_mode)
+#define VN_ISCHR(vp)	S_ISCHR((vp)->i_mode)
+#define VN_ISBLK(vp)	S_ISBLK((vp)->i_mode)
+
 /*
  * Vnode to Linux inode mapping.
  */
@@ -145,6 +151,24 @@ typedef struct bhv_vattr {
 		XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
 		XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
 
+/*
+ *  Modes.
+ */
+#define VSUID	S_ISUID		/* set user id on execution */
+#define VSGID	S_ISGID		/* set group id on execution */
+#define VSVTX	S_ISVTX		/* save swapped text even after use */
+#define VREAD	S_IRUSR		/* read, write, execute permissions */
+#define VWRITE	S_IWUSR
+#define VEXEC	S_IXUSR
+
+#define MODEMASK S_IALLUGO	/* mode bits plus permission bits */
+
+/*
+ * Check whether mandatory file locking is enabled.
+ */
+#define MANDLOCK(vp, mode)	\
+	(VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
+
 extern void	vn_init(void);
 extern int	vn_revalidate(bhv_vnode_t *);
 
diff --git a/trunk/fs/xfs/quota/xfs_dquot.c b/trunk/fs/xfs/quota/xfs_dquot.c
index 85df3288efd5..631ebb31b295 100644
--- a/trunk/fs/xfs/quota/xfs_dquot.c
+++ b/trunk/fs/xfs/quota/xfs_dquot.c
@@ -933,7 +933,7 @@ xfs_qm_dqget(
 	       type == XFS_DQ_PROJ ||
 	       type == XFS_DQ_GROUP);
 	if (ip) {
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 		if (type == XFS_DQ_USER)
 			ASSERT(ip->i_udquot == NULL);
 		else
@@ -1088,7 +1088,7 @@ xfs_qm_dqget(
 	xfs_qm_mplist_unlock(mp);
 	XFS_DQ_HASH_UNLOCK(h);
  dqret:
-	ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT((ip == NULL) || XFS_ISLOCKED_INODE_EXCL(ip));
 	xfs_dqtrace_entry(dqp, "DQGET DONE");
 	*O_dqpp = dqp;
 	return (0);
diff --git a/trunk/fs/xfs/quota/xfs_qm.c b/trunk/fs/xfs/quota/xfs_qm.c
index d31cce1165c5..40ea56409561 100644
--- a/trunk/fs/xfs/quota/xfs_qm.c
+++ b/trunk/fs/xfs/quota/xfs_qm.c
@@ -670,7 +670,7 @@ xfs_qm_dqattach_one(
 	xfs_dquot_t	*dqp;
 	int		error;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 	error = 0;
 	/*
 	 * See if we already have it in the inode itself. IO_idqpp is
@@ -874,7 +874,7 @@ xfs_qm_dqattach(
 		return 0;
 
 	ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
-	       xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	       XFS_ISLOCKED_INODE_EXCL(ip));
 
 	if (! (flags & XFS_QMOPT_ILOCKED))
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -888,8 +888,7 @@ xfs_qm_dqattach(
 			goto done;
 		nquotas++;
 	}
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 	if (XFS_IS_OQUOTA_ON(mp)) {
 		error = XFS_IS_GQUOTA_ON(mp) ?
 			xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
@@ -914,7 +913,7 @@ xfs_qm_dqattach(
 	 * This WON'T, in general, result in a thrash.
 	 */
 	if (nquotas == 2) {
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 		ASSERT(ip->i_udquot);
 		ASSERT(ip->i_gdquot);
 
@@ -957,7 +956,7 @@ xfs_qm_dqattach(
 
 #ifdef QUOTADEBUG
 	else
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 #endif
 	return error;
 }
@@ -1292,7 +1291,7 @@ xfs_qm_dqget_noattach(
 	xfs_mount_t	*mp;
 	xfs_dquot_t	*udqp, *gdqp;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 	mp = ip->i_mount;
 	udqp = NULL;
 	gdqp = NULL;
@@ -1393,7 +1392,7 @@ xfs_qm_qino_alloc(
 	 * Keep an extra reference to this quota inode. This inode is
 	 * locked exclusively and joined to the transaction already.
 	 */
-	ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip));
 	VN_HOLD(XFS_ITOV((*ip)));
 
 	/*
@@ -1738,6 +1737,12 @@ xfs_qm_dqusage_adjust(
 		return error;
 	}
 
+	if (ip->i_d.di_mode == 0) {
+		xfs_iput_new(ip, XFS_ILOCK_EXCL);
+		*res = BULKSTAT_RV_NOTHING;
+		return XFS_ERROR(ENOENT);
+	}
+
 	/*
 	 * Obtain the locked dquots. In case of an error (eg. allocation
 	 * fails for ENOSPC), we return the negative of the error number
@@ -2558,7 +2563,7 @@ xfs_qm_vop_chown(
 	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
 				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
 
 	/* old dquot */
@@ -2602,7 +2607,7 @@ xfs_qm_vop_chown_reserve(
 	uint		delblks, blkflags, prjflags = 0;
 	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+	ASSERT(XFS_ISLOCKED_INODE(ip));
 	mp = ip->i_mount;
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
@@ -2712,7 +2717,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
 	if (!XFS_IS_QUOTA_ON(tp->t_mountp))
 		return;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
 
 	if (udqp) {
diff --git a/trunk/fs/xfs/quota/xfs_qm_syscalls.c b/trunk/fs/xfs/quota/xfs_qm_syscalls.c
index 768a3b27d2b6..8342823dbdc3 100644
--- a/trunk/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/trunk/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1366,6 +1366,12 @@ xfs_qm_internalqcheck_adjust(
 		return (error);
 	}
 
+	if (ip->i_d.di_mode == 0) {
+		xfs_iput_new(ip, lock_flags);
+		*res = BULKSTAT_RV_NOTHING;
+		return XFS_ERROR(ENOENT);
+	}
+
 	/*
 	 * This inode can have blocks after eof which can get released
 	 * when we send it to inactive. Since we don't check the dquot
diff --git a/trunk/fs/xfs/quota/xfs_quota_priv.h b/trunk/fs/xfs/quota/xfs_quota_priv.h
index 5e4a40b1c565..a8b85e2be9d5 100644
--- a/trunk/fs/xfs/quota/xfs_quota_priv.h
+++ b/trunk/fs/xfs/quota/xfs_quota_priv.h
@@ -27,6 +27,11 @@
 /* Number of dquots that fit in to a dquot block */
 #define XFS_QM_DQPERBLK(mp)	((mp)->m_quotainfo->qi_dqperchunk)
 
+#define XFS_ISLOCKED_INODE(ip)		(ismrlocked(&(ip)->i_lock, \
+					    MR_UPDATE | MR_ACCESS) != 0)
+#define XFS_ISLOCKED_INODE_EXCL(ip)	(ismrlocked(&(ip)->i_lock, \
+					    MR_UPDATE) != 0)
+
 #define XFS_DQ_IS_ADDEDTO_TRX(t, d)	((d)->q_transp == (t))
 
 #define XFS_QI_MPLRECLAIMS(mp)	((mp)->m_quotainfo->qi_dqreclaims)
diff --git a/trunk/fs/xfs/quota/xfs_trans_dquot.c b/trunk/fs/xfs/quota/xfs_trans_dquot.c
index 99611381e740..f441f836ca8b 100644
--- a/trunk/fs/xfs/quota/xfs_trans_dquot.c
+++ b/trunk/fs/xfs/quota/xfs_trans_dquot.c
@@ -834,7 +834,7 @@ xfs_trans_reserve_quota_nblks(
 	ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
 	ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
 	ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
 				XFS_TRANS_DQ_RES_RTBLKS ||
diff --git a/trunk/fs/xfs/xfs.h b/trunk/fs/xfs/xfs.h
index 540e4c989825..765aaf65e2d3 100644
--- a/trunk/fs/xfs/xfs.h
+++ b/trunk/fs/xfs/xfs.h
@@ -22,7 +22,7 @@
 #define STATIC
 #define DEBUG 1
 #define XFS_BUF_LOCK_TRACKING 1
-/* #define QUOTADEBUG 1 */
+#define QUOTADEBUG 1
 #endif
 
 #ifdef CONFIG_XFS_TRACE
diff --git a/trunk/fs/xfs/xfs_acl.c b/trunk/fs/xfs/xfs_acl.c
index ebee3a4f703a..8e130b9720ae 100644
--- a/trunk/fs/xfs/xfs_acl.c
+++ b/trunk/fs/xfs/xfs_acl.c
@@ -72,7 +72,7 @@ xfs_acl_vhasacl_default(
 {
 	int		error;
 
-	if (!S_ISDIR(vp->i_mode))
+	if (!VN_ISDIR(vp))
 		return 0;
 	xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
 	return (error == 0);
@@ -238,8 +238,15 @@ xfs_acl_vget(
 			error = EINVAL;
 			goto out;
 		}
-		if (kind == _ACL_TYPE_ACCESS)
-			xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl);
+		if (kind == _ACL_TYPE_ACCESS) {
+			bhv_vattr_t	va;
+
+			va.va_mask = XFS_AT_MODE;
+			error = xfs_getattr(xfs_vtoi(vp), &va, 0);
+			if (error)
+				goto out;
+			xfs_acl_sync_mode(va.va_mode, xfs_acl);
+		}
 		error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
 	}
 out:
@@ -334,15 +341,14 @@ xfs_acl_iaccess(
 {
 	xfs_acl_t	*acl;
 	int		rval;
-	struct xfs_name	acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
 
 	if (!(_ACL_ALLOC(acl)))
 		return -1;
 
 	/* If the file has no ACL return -1. */
 	rval = sizeof(xfs_acl_t);
-	if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval,
-					ATTR_ROOT | ATTR_KERNACCESS)) {
+	if (xfs_attr_fetch(ip, SGI_ACL_FILE, SGI_ACL_FILE_SIZE,
+			(char *)acl, &rval, ATTR_ROOT | ATTR_KERNACCESS, cr)) {
 		_ACL_FREE(acl);
 		return -1;
 	}
@@ -367,15 +373,23 @@ xfs_acl_allow_set(
 	bhv_vnode_t	*vp,
 	int		kind)
 {
+	xfs_inode_t	*ip = xfs_vtoi(vp);
+	bhv_vattr_t	va;
+	int		error;
+
 	if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
 		return EPERM;
-	if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
+	if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp))
 		return ENOTDIR;
 	if (vp->i_sb->s_flags & MS_RDONLY)
 		return EROFS;
-	if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))
+	va.va_mask = XFS_AT_UID;
+	error = xfs_getattr(ip, &va, 0);
+	if (error)
+		return error;
+	if (va.va_uid != current->fsuid && !capable(CAP_FOWNER))
 		return EPERM;
-	return 0;
+	return error;
 }
 
 /*
@@ -580,7 +594,7 @@ xfs_acl_get_attr(
 	*error = xfs_attr_get(xfs_vtoi(vp),
 					kind == _ACL_TYPE_ACCESS ?
 					SGI_ACL_FILE : SGI_ACL_DEFAULT,
-					(char *)aclp, &len, flags);
+					(char *)aclp, &len, flags, sys_cred);
 	if (*error || (flags & ATTR_KERNOVAL))
 		return;
 	xfs_acl_get_endian(aclp);
@@ -629,6 +643,7 @@ xfs_acl_vtoacl(
 	xfs_acl_t	*access_acl,
 	xfs_acl_t	*default_acl)
 {
+	bhv_vattr_t	va;
 	int		error = 0;
 
 	if (access_acl) {
@@ -637,10 +652,16 @@ xfs_acl_vtoacl(
 		 * be obtained for some reason, invalidate the access ACL.
 		 */
 		xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
+		if (!error) {
+			/* Got the ACL, need the mode... */
+			va.va_mask = XFS_AT_MODE;
+			error = xfs_getattr(xfs_vtoi(vp), &va, 0);
+		}
+
 		if (error)
 			access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
 		else /* We have a good ACL and the file mode, synchronize. */
-			xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl);
+			xfs_acl_sync_mode(va.va_mode, access_acl);
 	}
 
 	if (default_acl) {
@@ -698,7 +719,7 @@ xfs_acl_inherit(
 	 * If the new file is a directory, its default ACL is a copy of
 	 * the containing directory's default ACL.
 	 */
-	if (S_ISDIR(vp->i_mode))
+	if (VN_ISDIR(vp))
 		xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
 	if (!error && !basicperms)
 		xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
@@ -723,7 +744,7 @@ xfs_acl_setmode(
 	bhv_vattr_t	va;
 	xfs_acl_entry_t	*ap;
 	xfs_acl_entry_t	*gap = NULL;
-	int		i, nomask = 1;
+	int		i, error, nomask = 1;
 
 	*basicperms = 1;
 
@@ -735,7 +756,11 @@ xfs_acl_setmode(
 	 * mode.  The m:: bits take precedence over the g:: bits.
 	 */
 	va.va_mask = XFS_AT_MODE;
-	va.va_mode = xfs_vtoi(vp)->i_d.di_mode;
+	error = xfs_getattr(xfs_vtoi(vp), &va, 0);
+	if (error)
+		return error;
+
+	va.va_mask = XFS_AT_MODE;
 	va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
 	ap = acl->acl_entry;
 	for (i = 0; i < acl->acl_cnt; ++i) {
diff --git a/trunk/fs/xfs/xfs_attr.c b/trunk/fs/xfs/xfs_attr.c
index df151a859186..36d781ee5fcc 100644
--- a/trunk/fs/xfs/xfs_attr.c
+++ b/trunk/fs/xfs/xfs_attr.c
@@ -101,28 +101,14 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
 ktrace_t *xfs_attr_trace_buf;
 #endif
 
-STATIC int
-xfs_attr_name_to_xname(
-	struct xfs_name	*xname,
-	const char	*aname)
-{
-	if (!aname)
-		return EINVAL;
-	xname->name = aname;
-	xname->len = strlen(aname);
-	if (xname->len >= MAXNAMELEN)
-		return EFAULT;		/* match IRIX behaviour */
-
-	return 0;
-}
 
 /*========================================================================
  * Overall external interface routines.
  *========================================================================*/
 
 int
-xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
-		char *value, int *valuelenp, int flags)
+xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
+	       char *value, int *valuelenp, int flags, struct cred *cred)
 {
 	xfs_da_args_t   args;
 	int             error;
@@ -136,8 +122,8 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
 	 * Fill in the arg structure for this request.
 	 */
 	memset((char *)&args, 0, sizeof(args));
-	args.name = name->name;
-	args.namelen = name->len;
+	args.name = name;
+	args.namelen = namelen;
 	args.value = value;
 	args.valuelen = *valuelenp;
 	args.flags = flags;
@@ -176,29 +162,31 @@ xfs_attr_get(
 	const char	*name,
 	char		*value,
 	int		*valuelenp,
-	int		flags)
+	int		flags,
+	cred_t		*cred)
 {
-	int		error;
-	struct xfs_name	xname;
+	int		error, namelen;
 
 	XFS_STATS_INC(xs_attr_get);
 
+	if (!name)
+		return(EINVAL);
+	namelen = strlen(name);
+	if (namelen >= MAXNAMELEN)
+		return(EFAULT);		/* match IRIX behaviour */
+
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return(EIO);
 
-	error = xfs_attr_name_to_xname(&xname, name);
-	if (error)
-		return error;
-
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags);
+	error = xfs_attr_fetch(ip, name, namelen, value, valuelenp, flags, cred);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 	return(error);
 }
 
-STATIC int
-xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
-		char *value, int valuelen, int flags)
+int
+xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
+		 char *value, int valuelen, int flags)
 {
 	xfs_da_args_t	args;
 	xfs_fsblock_t	firstblock;
@@ -221,7 +209,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
 	 */
 	if (XFS_IFORK_Q(dp) == 0) {
 		int sf_size = sizeof(xfs_attr_sf_hdr_t) +
-			      XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen);
+			      XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen);
 
 		if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
 			return(error);
@@ -231,8 +219,8 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
 	 * Fill in the arg structure for this request.
 	 */
 	memset((char *)&args, 0, sizeof(args));
-	args.name = name->name;
-	args.namelen = name->len;
+	args.name = name;
+	args.namelen = namelen;
 	args.value = value;
 	args.valuelen = valuelen;
 	args.flags = flags;
@@ -248,7 +236,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
 	 * Determine space new attribute will use, and if it would be
 	 * "local" or "remote" (note: local != inline).
 	 */
-	size = xfs_attr_leaf_newentsize(name->len, valuelen,
+	size = xfs_attr_leaf_newentsize(namelen, valuelen,
 					mp->m_sb.sb_blocksize, &local);
 
 	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
@@ -441,27 +429,26 @@ xfs_attr_set(
 	int		valuelen,
 	int		flags)
 {
-	int             error;
-	struct xfs_name	xname;
+	int             namelen;
+
+	namelen = strlen(name);
+	if (namelen >= MAXNAMELEN)
+		return EFAULT;		/* match IRIX behaviour */
 
 	XFS_STATS_INC(xs_attr_set);
 
 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
 		return (EIO);
 
-	error = xfs_attr_name_to_xname(&xname, name);
-	if (error)
-		return error;
-
-	return xfs_attr_set_int(dp, &xname, value, valuelen, flags);
+	return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags);
 }
 
 /*
  * Generic handler routine to remove a name from an attribute list.
  * Transitions attribute list from Btree to shortform as necessary.
  */
-STATIC int
-xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
+int
+xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
 {
 	xfs_da_args_t	args;
 	xfs_fsblock_t	firstblock;
@@ -473,8 +460,8 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
 	 * Fill in the arg structure for this request.
 	 */
 	memset((char *)&args, 0, sizeof(args));
-	args.name = name->name;
-	args.namelen = name->len;
+	args.name = name;
+	args.namelen = namelen;
 	args.flags = flags;
 	args.hashval = xfs_da_hashname(args.name, args.namelen);
 	args.dp = dp;
@@ -588,18 +575,17 @@ xfs_attr_remove(
 	const char	*name,
 	int		flags)
 {
-	int		error;
-	struct xfs_name	xname;
+	int		namelen;
+
+	namelen = strlen(name);
+	if (namelen >= MAXNAMELEN)
+		return EFAULT;		/* match IRIX behaviour */
 
 	XFS_STATS_INC(xs_attr_remove);
 
 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
 		return (EIO);
 
-	error = xfs_attr_name_to_xname(&xname, name);
-	if (error)
-		return error;
-
 	xfs_ilock(dp, XFS_ILOCK_SHARED);
 	if (XFS_IFORK_Q(dp) == 0 ||
 		   (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
@@ -609,10 +595,10 @@ xfs_attr_remove(
 	}
 	xfs_iunlock(dp, XFS_ILOCK_SHARED);
 
-	return xfs_attr_remove_int(dp, &xname, flags);
+	return xfs_attr_remove_int(dp, name, namelen, flags);
 }
 
-STATIC int
+int								/* error */
 xfs_attr_list_int(xfs_attr_list_context_t *context)
 {
 	int error;
@@ -2536,7 +2522,8 @@ attr_generic_get(
 {
 	int	error, asize = size;
 
-	error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
+	error = xfs_attr_get(xfs_vtoi(vp), name, data,
+				    &asize, xflags, NULL);
 	if (!error)
 		return asize;
 	return -error;
diff --git a/trunk/fs/xfs/xfs_attr.h b/trunk/fs/xfs/xfs_attr.h
index 6cfc9384fe35..786eba3121c4 100644
--- a/trunk/fs/xfs/xfs_attr.h
+++ b/trunk/fs/xfs/xfs_attr.h
@@ -158,10 +158,14 @@ struct xfs_da_args;
 /*
  * Overall external interface routines.
  */
+int xfs_attr_set_int(struct xfs_inode *, const char *, int, char *, int, int);
+int xfs_attr_remove_int(struct xfs_inode *, const char *, int, int);
+int xfs_attr_list_int(struct xfs_attr_list_context *);
 int xfs_attr_inactive(struct xfs_inode *dp);
 
 int xfs_attr_shortform_getvalue(struct xfs_da_args *);
-int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
+int xfs_attr_fetch(struct xfs_inode *, const char *, int,
+			char *, int *, int, struct cred *);
 int xfs_attr_rmtval_get(struct xfs_da_args *args);
 
 #endif	/* __XFS_ATTR_H__ */
diff --git a/trunk/fs/xfs/xfs_bmap.c b/trunk/fs/xfs/xfs_bmap.c
index 53c259f5a5af..eb198c01c35d 100644
--- a/trunk/fs/xfs/xfs_bmap.c
+++ b/trunk/fs/xfs/xfs_bmap.c
@@ -4074,6 +4074,7 @@ xfs_bmap_add_attrfork(
 error2:
 	xfs_bmap_cancel(&flist);
 error1:
+	ASSERT(ismrlocked(&ip->i_lock,MR_UPDATE));
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 error0:
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
diff --git a/trunk/fs/xfs/xfs_dfrag.c b/trunk/fs/xfs/xfs_dfrag.c
index 5f3647cb9885..3f53fad356a3 100644
--- a/trunk/fs/xfs/xfs_dfrag.c
+++ b/trunk/fs/xfs/xfs_dfrag.c
@@ -162,7 +162,7 @@ xfs_swap_extents(
 		ips[1] = ip;
 	}
 
-	xfs_lock_inodes(ips, 2, lock_flags);
+	xfs_lock_inodes(ips, 2, 0, lock_flags);
 	locked = 1;
 
 	/* Verify that both files have the same format */
@@ -265,7 +265,7 @@ xfs_swap_extents(
 		locked = 0;
 		goto error0;
 	}
-	xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
+	xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
 
 	/*
 	 * Count the number of extended attribute blocks
diff --git a/trunk/fs/xfs/xfs_fsops.c b/trunk/fs/xfs/xfs_fsops.c
index 381ebda4f7bc..d3a0f538d6a6 100644
--- a/trunk/fs/xfs/xfs_fsops.c
+++ b/trunk/fs/xfs/xfs_fsops.c
@@ -462,7 +462,7 @@ xfs_fs_counts(
 	xfs_mount_t		*mp,
 	xfs_fsop_counts_t	*cnt)
 {
-	xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+	xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
 	spin_lock(&mp->m_sb_lock);
 	cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
 	cnt->freertx = mp->m_sb.sb_frextents;
@@ -524,7 +524,7 @@ xfs_reserve_blocks(
 	 */
 retry:
 	spin_lock(&mp->m_sb_lock);
-	xfs_icsb_sync_counters_locked(mp, 0);
+	xfs_icsb_sync_counters_flags(mp, XFS_ICSB_SB_LOCKED);
 
 	/*
 	 * If our previous reservation was larger than the current value,
@@ -552,8 +552,11 @@ xfs_reserve_blocks(
 			mp->m_resblks += free;
 			mp->m_resblks_avail += free;
 			fdblks_delta = -free;
+			mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
 		} else {
 			fdblks_delta = -delta;
+			mp->m_sb.sb_fdblocks =
+				lcounter + XFS_ALLOC_SET_ASIDE(mp);
 			mp->m_resblks = request;
 			mp->m_resblks_avail += delta;
 		}
@@ -584,6 +587,7 @@ xfs_reserve_blocks(
 		if (error == ENOSPC)
 			goto retry;
 	}
+
 	return 0;
 }
 
diff --git a/trunk/fs/xfs/xfs_ialloc.c b/trunk/fs/xfs/xfs_ialloc.c
index aad8c5da38af..a64dfbd565a5 100644
--- a/trunk/fs/xfs/xfs_ialloc.c
+++ b/trunk/fs/xfs/xfs_ialloc.c
@@ -147,7 +147,6 @@ xfs_ialloc_ag_alloc(
 	int		version;	/* inode version number to use */
 	int		isaligned = 0;	/* inode allocation at stripe unit */
 					/* boundary */
-	unsigned int	gen;
 
 	args.tp = tp;
 	args.mp = tp->t_mountp;
@@ -291,14 +290,6 @@ xfs_ialloc_ag_alloc(
 	else
 		version = XFS_DINODE_VERSION_1;
 
-	/*
-	 * Seed the new inode cluster with a random generation number. This
-	 * prevents short-term reuse of generation numbers if a chunk is
-	 * freed and then immediately reallocated. We use random numbers
-	 * rather than a linear progression to prevent the next generation
-	 * number from being easily guessable.
-	 */
-	gen = random32();
 	for (j = 0; j < nbufs; j++) {
 		/*
 		 * Get the block.
@@ -318,7 +309,6 @@ xfs_ialloc_ag_alloc(
 			free = XFS_MAKE_IPTR(args.mp, fbuf, i);
 			free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 			free->di_core.di_version = version;
-			free->di_core.di_gen = cpu_to_be32(gen);
 			free->di_next_unlinked = cpu_to_be32(NULLAGINO);
 			xfs_ialloc_log_di(tp, fbuf, i,
 				XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
diff --git a/trunk/fs/xfs/xfs_iget.c b/trunk/fs/xfs/xfs_iget.c
index b07604b94d9f..e657c5128460 100644
--- a/trunk/fs/xfs/xfs_iget.c
+++ b/trunk/fs/xfs/xfs_iget.c
@@ -593,9 +593,8 @@ xfs_iunlock_map_shared(
  *		XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
  */
 void
-xfs_ilock(
-	xfs_inode_t		*ip,
-	uint			lock_flags)
+xfs_ilock(xfs_inode_t	*ip,
+	  uint		lock_flags)
 {
 	/*
 	 * You can't set both SHARED and EXCL for the same lock,
@@ -608,16 +607,16 @@ xfs_ilock(
 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
 	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
 
-	if (lock_flags & XFS_IOLOCK_EXCL)
+	if (lock_flags & XFS_IOLOCK_EXCL) {
 		mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
-	else if (lock_flags & XFS_IOLOCK_SHARED)
+	} else if (lock_flags & XFS_IOLOCK_SHARED) {
 		mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
-
-	if (lock_flags & XFS_ILOCK_EXCL)
+	}
+	if (lock_flags & XFS_ILOCK_EXCL) {
 		mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
-	else if (lock_flags & XFS_ILOCK_SHARED)
+	} else if (lock_flags & XFS_ILOCK_SHARED) {
 		mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
-
+	}
 	xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
 }
 
@@ -632,12 +631,15 @@ xfs_ilock(
  * lock_flags -- this parameter indicates the inode's locks to be
  *       to be locked.  See the comment for xfs_ilock() for a list
  *	 of valid values.
+ *
  */
 int
-xfs_ilock_nowait(
-	xfs_inode_t		*ip,
-	uint			lock_flags)
+xfs_ilock_nowait(xfs_inode_t	*ip,
+		 uint		lock_flags)
 {
+	int	iolocked;
+	int	ilocked;
+
 	/*
 	 * You can't set both SHARED and EXCL for the same lock,
 	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
@@ -649,30 +651,37 @@ xfs_ilock_nowait(
 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
 	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
 
+	iolocked = 0;
 	if (lock_flags & XFS_IOLOCK_EXCL) {
-		if (!mrtryupdate(&ip->i_iolock))
-			goto out;
+		iolocked = mrtryupdate(&ip->i_iolock);
+		if (!iolocked) {
+			return 0;
+		}
 	} else if (lock_flags & XFS_IOLOCK_SHARED) {
-		if (!mrtryaccess(&ip->i_iolock))
-			goto out;
+		iolocked = mrtryaccess(&ip->i_iolock);
+		if (!iolocked) {
+			return 0;
+		}
 	}
 	if (lock_flags & XFS_ILOCK_EXCL) {
-		if (!mrtryupdate(&ip->i_lock))
-			goto out_undo_iolock;
+		ilocked = mrtryupdate(&ip->i_lock);
+		if (!ilocked) {
+			if (iolocked) {
+				mrunlock(&ip->i_iolock);
+			}
+			return 0;
+		}
 	} else if (lock_flags & XFS_ILOCK_SHARED) {
-		if (!mrtryaccess(&ip->i_lock))
-			goto out_undo_iolock;
+		ilocked = mrtryaccess(&ip->i_lock);
+		if (!ilocked) {
+			if (iolocked) {
+				mrunlock(&ip->i_iolock);
+			}
+			return 0;
+		}
 	}
 	xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address);
 	return 1;
-
- out_undo_iolock:
-	if (lock_flags & XFS_IOLOCK_EXCL)
-		mrunlock_excl(&ip->i_iolock);
-	else if (lock_flags & XFS_IOLOCK_SHARED)
-		mrunlock_shared(&ip->i_iolock);
- out:
-	return 0;
 }
 
 /*
@@ -688,9 +697,8 @@ xfs_ilock_nowait(
  *
  */
 void
-xfs_iunlock(
-	xfs_inode_t		*ip,
-	uint			lock_flags)
+xfs_iunlock(xfs_inode_t	*ip,
+	    uint	lock_flags)
 {
 	/*
 	 * You can't set both SHARED and EXCL for the same lock,
@@ -705,25 +713,31 @@ xfs_iunlock(
 			XFS_LOCK_DEP_MASK)) == 0);
 	ASSERT(lock_flags != 0);
 
-	if (lock_flags & XFS_IOLOCK_EXCL)
-		mrunlock_excl(&ip->i_iolock);
-	else if (lock_flags & XFS_IOLOCK_SHARED)
-		mrunlock_shared(&ip->i_iolock);
+	if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) {
+		ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) ||
+		       (ismrlocked(&ip->i_iolock, MR_ACCESS)));
+		ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) ||
+		       (ismrlocked(&ip->i_iolock, MR_UPDATE)));
+		mrunlock(&ip->i_iolock);
+	}
 
-	if (lock_flags & XFS_ILOCK_EXCL)
-		mrunlock_excl(&ip->i_lock);
-	else if (lock_flags & XFS_ILOCK_SHARED)
-		mrunlock_shared(&ip->i_lock);
+	if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) {
+		ASSERT(!(lock_flags & XFS_ILOCK_SHARED) ||
+		       (ismrlocked(&ip->i_lock, MR_ACCESS)));
+		ASSERT(!(lock_flags & XFS_ILOCK_EXCL) ||
+		       (ismrlocked(&ip->i_lock, MR_UPDATE)));
+		mrunlock(&ip->i_lock);
 
-	if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) &&
-	    !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) {
 		/*
 		 * Let the AIL know that this item has been unlocked in case
 		 * it is in the AIL and anyone is waiting on it.  Don't do
 		 * this if the caller has asked us not to.
 		 */
-		xfs_trans_unlocked_item(ip->i_mount,
-					(xfs_log_item_t*)(ip->i_itemp));
+		if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) &&
+		     ip->i_itemp != NULL) {
+			xfs_trans_unlocked_item(ip->i_mount,
+						(xfs_log_item_t*)(ip->i_itemp));
+		}
 	}
 	xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
 }
@@ -733,47 +747,21 @@ xfs_iunlock(
  * if it is being demoted.
  */
 void
-xfs_ilock_demote(
-	xfs_inode_t		*ip,
-	uint			lock_flags)
+xfs_ilock_demote(xfs_inode_t	*ip,
+		 uint		lock_flags)
 {
 	ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
 	ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
 
-	if (lock_flags & XFS_ILOCK_EXCL)
+	if (lock_flags & XFS_ILOCK_EXCL) {
+		ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
 		mrdemote(&ip->i_lock);
-	if (lock_flags & XFS_IOLOCK_EXCL)
-		mrdemote(&ip->i_iolock);
-}
-
-#ifdef DEBUG
-/*
- * Debug-only routine, without additional rw_semaphore APIs, we can
- * now only answer requests regarding whether we hold the lock for write
- * (reader state is outside our visibility, we only track writer state).
- *
- * Note: this means !xfs_isilocked would give false positives, so don't do that.
- */
-int
-xfs_isilocked(
-	xfs_inode_t		*ip,
-	uint			lock_flags)
-{
-	if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) ==
-			XFS_ILOCK_EXCL) {
-		if (!ip->i_lock.mr_writer)
-			return 0;
 	}
-
-	if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) ==
-			XFS_IOLOCK_EXCL) {
-		if (!ip->i_iolock.mr_writer)
-			return 0;
+	if (lock_flags & XFS_IOLOCK_EXCL) {
+		ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
+		mrdemote(&ip->i_iolock);
 	}
-
-	return 1;
 }
-#endif
 
 /*
  * The following three routines simply manage the i_flock
diff --git a/trunk/fs/xfs/xfs_inode.c b/trunk/fs/xfs/xfs_inode.c
index cf0bb9c1d621..ca12acb90394 100644
--- a/trunk/fs/xfs/xfs_inode.c
+++ b/trunk/fs/xfs/xfs_inode.c
@@ -1291,7 +1291,7 @@ xfs_file_last_byte(
 	xfs_fileoff_t	size_last_block;
 	int		error;
 
-	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
+	ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE | MR_ACCESS));
 
 	mp = ip->i_mount;
 	/*
@@ -1402,7 +1402,7 @@ xfs_itruncate_start(
 	bhv_vnode_t	*vp;
 	int		error = 0;
 
-	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+	ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
 	ASSERT((new_size == 0) || (new_size <= ip->i_size));
 	ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
 	       (flags == XFS_ITRUNC_MAYBE));
@@ -1528,7 +1528,8 @@ xfs_itruncate_finish(
 	xfs_bmap_free_t	free_list;
 	int		error;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+	ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
 	ASSERT((new_size == 0) || (new_size <= ip->i_size));
 	ASSERT(*tp != NULL);
 	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -1779,7 +1780,8 @@ xfs_igrow_start(
 	xfs_fsize_t	new_size,
 	cred_t		*credp)
 {
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+	ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
+	ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
 	ASSERT(new_size > ip->i_size);
 
 	/*
@@ -1807,7 +1809,8 @@ xfs_igrow_finish(
 	xfs_fsize_t	new_size,
 	int		change_flag)
 {
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+	ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
+	ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
 	ASSERT(ip->i_transp == tp);
 	ASSERT(new_size > ip->i_size);
 
@@ -2284,7 +2287,7 @@ xfs_ifree(
 	xfs_dinode_t    	*dip;
 	xfs_buf_t       	*ibp;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
 	ASSERT(ip->i_transp == tp);
 	ASSERT(ip->i_d.di_nlink == 0);
 	ASSERT(ip->i_d.di_nextents == 0);
@@ -2743,7 +2746,7 @@ void
 xfs_ipin(
 	xfs_inode_t	*ip)
 {
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
 
 	atomic_inc(&ip->i_pincount);
 }
@@ -2776,7 +2779,7 @@ __xfs_iunpin_wait(
 {
 	xfs_inode_log_item_t	*iip = ip->i_itemp;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS));
 	if (atomic_read(&ip->i_pincount) == 0)
 		return;
 
@@ -2826,7 +2829,7 @@ xfs_iextents_copy(
 	xfs_fsblock_t		start_block;
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
 	ASSERT(ifp->if_bytes > 0);
 
 	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
@@ -3129,7 +3132,7 @@ xfs_iflush(
 
 	XFS_STATS_INC(xs_iflush_count);
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
 	ASSERT(issemalocked(&(ip->i_flock)));
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 	       ip->i_d.di_nextents > ip->i_df.if_ext_max);
@@ -3294,7 +3297,7 @@ xfs_iflush_int(
 	int			first;
 #endif
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
 	ASSERT(issemalocked(&(ip->i_flock)));
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 	       ip->i_d.di_nextents > ip->i_df.if_ext_max);
diff --git a/trunk/fs/xfs/xfs_inode.h b/trunk/fs/xfs/xfs_inode.h
index 0a999fee4f03..93c37697a72c 100644
--- a/trunk/fs/xfs/xfs_inode.h
+++ b/trunk/fs/xfs/xfs_inode.h
@@ -386,9 +386,20 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
 #define	XFS_ILOCK_EXCL		(1<<2)
 #define	XFS_ILOCK_SHARED	(1<<3)
 #define	XFS_IUNLOCK_NONOTIFY	(1<<4)
+/*	#define XFS_IOLOCK_NESTED	(1<<5)	*/
+#define XFS_EXTENT_TOKEN_RD	(1<<6)
+#define XFS_SIZE_TOKEN_RD	(1<<7)
+#define XFS_EXTSIZE_RD		(XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
+#define XFS_WILLLEND		(1<<8)	/* Always acquire tokens for lending */
+#define XFS_EXTENT_TOKEN_WR	(XFS_EXTENT_TOKEN_RD | XFS_WILLLEND)
+#define XFS_SIZE_TOKEN_WR       (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
+#define XFS_EXTSIZE_WR		(XFS_EXTSIZE_RD | XFS_WILLLEND)
+/* TODO:XFS_SIZE_TOKEN_WANT	(1<<9) */
 
 #define XFS_LOCK_MASK		(XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
-				| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
+				| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
+				| XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD \
+				| XFS_WILLLEND)
 
 /*
  * Flags for lockdep annotations.
@@ -472,7 +483,6 @@ void		xfs_ilock(xfs_inode_t *, uint);
 int		xfs_ilock_nowait(xfs_inode_t *, uint);
 void		xfs_iunlock(xfs_inode_t *, uint);
 void		xfs_ilock_demote(xfs_inode_t *, uint);
-int		xfs_isilocked(xfs_inode_t *, uint);
 void		xfs_iflock(xfs_inode_t *);
 int		xfs_iflock_nowait(xfs_inode_t *);
 uint		xfs_ilock_map_shared(xfs_inode_t *);
@@ -524,7 +534,7 @@ int		xfs_iflush(xfs_inode_t *, uint);
 void		xfs_iflush_all(struct xfs_mount *);
 void		xfs_ichgtime(xfs_inode_t *, int);
 xfs_fsize_t	xfs_file_last_byte(xfs_inode_t *);
-void		xfs_lock_inodes(xfs_inode_t **, int, uint);
+void		xfs_lock_inodes(xfs_inode_t **, int, int, uint);
 
 void		xfs_synchronize_atime(xfs_inode_t *);
 void		xfs_mark_inode_dirty_sync(xfs_inode_t *);
diff --git a/trunk/fs/xfs/xfs_inode_item.c b/trunk/fs/xfs/xfs_inode_item.c
index 167b33f15772..93b5db453ea2 100644
--- a/trunk/fs/xfs/xfs_inode_item.c
+++ b/trunk/fs/xfs/xfs_inode_item.c
@@ -547,7 +547,7 @@ STATIC void
 xfs_inode_item_pin(
 	xfs_inode_log_item_t	*iip)
 {
-	ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
+	ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE));
 	xfs_ipin(iip->ili_inode);
 }
 
@@ -664,13 +664,13 @@ xfs_inode_item_unlock(
 
 	ASSERT(iip != NULL);
 	ASSERT(iip->ili_inode->i_itemp != NULL);
-	ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
+	ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE));
 	ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
 		  XFS_ILI_IOLOCKED_EXCL)) ||
-	       xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL));
+	       ismrlocked(&(iip->ili_inode->i_iolock), MR_UPDATE));
 	ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
 		  XFS_ILI_IOLOCKED_SHARED)) ||
-	       xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED));
+	       ismrlocked(&(iip->ili_inode->i_iolock), MR_ACCESS));
 	/*
 	 * Clear the transaction pointer in the inode.
 	 */
@@ -769,7 +769,7 @@ xfs_inode_item_pushbuf(
 
 	ip = iip->ili_inode;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
+	ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS));
 
 	/*
 	 * The ili_pushbuf_flag keeps others from
@@ -857,7 +857,7 @@ xfs_inode_item_push(
 
 	ip = iip->ili_inode;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
+	ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS));
 	ASSERT(issemalocked(&(ip->i_flock)));
 	/*
 	 * Since we were able to lock the inode's flush lock and
diff --git a/trunk/fs/xfs/xfs_iomap.c b/trunk/fs/xfs/xfs_iomap.c
index 7edcde691d1a..fb3cf1191419 100644
--- a/trunk/fs/xfs/xfs_iomap.c
+++ b/trunk/fs/xfs/xfs_iomap.c
@@ -196,14 +196,14 @@ xfs_iomap(
 		break;
 	case BMAPI_WRITE:
 		xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count);
-		lockmode = XFS_ILOCK_EXCL;
+		lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
 		if (flags & BMAPI_IGNSTATE)
 			bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
 		xfs_ilock(ip, lockmode);
 		break;
 	case BMAPI_ALLOCATE:
 		xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count);
-		lockmode = XFS_ILOCK_SHARED;
+		lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
 		bmapi_flags = XFS_BMAPI_ENTIRE;
 
 		/* Attempt non-blocking lock */
@@ -523,7 +523,8 @@ xfs_iomap_write_direct(
 		goto error_out;
 	}
 
-	if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
+	if (unlikely(!imap.br_startblock &&
+		     !(XFS_IS_REALTIME_INODE(ip)))) {
 		error = xfs_cmn_err_fsblock_zero(ip, &imap);
 		goto error_out;
 	}
@@ -623,7 +624,7 @@ xfs_iomap_write_delay(
 	int		prealloc, fsynced = 0;
 	int		error;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
 
 	/*
 	 * Make sure that the dquots are there. This doesn't hold
@@ -685,7 +686,8 @@ xfs_iomap_write_delay(
 		goto retry;
 	}
 
-	if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
+	if (unlikely(!imap[0].br_startblock &&
+		     !(XFS_IS_REALTIME_INODE(ip))))
 		return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
 
 	*ret_imap = imap[0];
@@ -836,9 +838,9 @@ xfs_iomap_write_allocate(
 		 * See if we were able to allocate an extent that
 		 * covers at least part of the callers request
 		 */
-		if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
+		if (unlikely(!imap.br_startblock &&
+			     XFS_IS_REALTIME_INODE(ip)))
 			return xfs_cmn_err_fsblock_zero(ip, &imap);
-
 		if ((offset_fsb >= imap.br_startoff) &&
 		    (offset_fsb < (imap.br_startoff +
 				   imap.br_blockcount))) {
@@ -932,7 +934,8 @@ xfs_iomap_write_unwritten(
 		if (error)
 			return XFS_ERROR(error);
 
-		if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
+		if (unlikely(!imap.br_startblock &&
+			     !(XFS_IS_REALTIME_INODE(ip))))
 			return xfs_cmn_err_fsblock_zero(ip, &imap);
 
 		if ((numblks_fsb = imap.br_blockcount) == 0) {
diff --git a/trunk/fs/xfs/xfs_itable.c b/trunk/fs/xfs/xfs_itable.c
index 419de15aeb43..eb85bdedad0c 100644
--- a/trunk/fs/xfs/xfs_itable.c
+++ b/trunk/fs/xfs/xfs_itable.c
@@ -71,6 +71,11 @@ xfs_bulkstat_one_iget(
 
 	ASSERT(ip != NULL);
 	ASSERT(ip->i_blkno != (xfs_daddr_t)0);
+	if (ip->i_d.di_mode == 0) {
+		*stat = BULKSTAT_RV_NOTHING;
+		error = XFS_ERROR(ENOENT);
+		goto out_iput;
+	}
 
 	vp = XFS_ITOV(ip);
 	dic = &ip->i_d;
@@ -119,6 +124,7 @@ xfs_bulkstat_one_iget(
 		break;
 	}
 
+ out_iput:
 	xfs_iput(ip, XFS_ILOCK_SHARED);
 	return error;
 }
diff --git a/trunk/fs/xfs/xfs_mount.c b/trunk/fs/xfs/xfs_mount.c
index da3988453b71..2fec452afbcc 100644
--- a/trunk/fs/xfs/xfs_mount.c
+++ b/trunk/fs/xfs/xfs_mount.c
@@ -54,9 +54,8 @@ STATIC void	xfs_unmountfs_wait(xfs_mount_t *);
 #ifdef HAVE_PERCPU_SB
 STATIC void	xfs_icsb_destroy_counters(xfs_mount_t *);
 STATIC void	xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
-						int);
-STATIC void	xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
-						int);
+						int, int);
+STATIC void	xfs_icsb_sync_counters(xfs_mount_t *);
 STATIC int	xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
 						int64_t, int);
 STATIC void	xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
@@ -64,8 +63,8 @@ STATIC void	xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
 #else
 
 #define xfs_icsb_destroy_counters(mp)			do { } while (0)
-#define xfs_icsb_balance_counter(mp, a, b)		do { } while (0)
-#define xfs_icsb_balance_counter_locked(mp, a, b)	do { } while (0)
+#define xfs_icsb_balance_counter(mp, a, b, c)		do { } while (0)
+#define xfs_icsb_sync_counters(mp)			do { } while (0)
 #define xfs_icsb_modify_counters(mp, a, b, c)		do { } while (0)
 
 #endif
@@ -1401,7 +1400,7 @@ xfs_log_sbcount(
 	if (!xfs_fs_writable(mp))
 		return 0;
 
-	xfs_icsb_sync_counters(mp, 0);
+	xfs_icsb_sync_counters(mp);
 
 	/*
 	 * we don't need to do this if we are updating the superblock
@@ -2027,9 +2026,9 @@ xfs_icsb_cpu_notify(
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
 		xfs_icsb_lock(mp);
-		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
-		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
-		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
+		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
+		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
+		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
 		xfs_icsb_unlock(mp);
 		break;
 	case CPU_DEAD:
@@ -2049,9 +2048,12 @@ xfs_icsb_cpu_notify(
 
 		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
 
-		xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
-		xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
-		xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
+		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT,
+					 XFS_ICSB_SB_LOCKED, 0);
+		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE,
+					 XFS_ICSB_SB_LOCKED, 0);
+		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS,
+					 XFS_ICSB_SB_LOCKED, 0);
 		spin_unlock(&mp->m_sb_lock);
 		xfs_icsb_unlock(mp);
 		break;
@@ -2103,9 +2105,9 @@ xfs_icsb_reinit_counters(
 	 * initial balance kicks us off correctly
 	 */
 	mp->m_icsb_counters = -1;
-	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
-	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
-	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
+	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
+	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
+	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
 	xfs_icsb_unlock(mp);
 }
 
@@ -2221,7 +2223,7 @@ xfs_icsb_disable_counter(
 	if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
 		/* drain back to superblock */
 
-		xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
+		xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT);
 		switch(field) {
 		case XFS_SBS_ICOUNT:
 			mp->m_sb.sb_icount = cnt.icsb_icount;
@@ -2276,33 +2278,38 @@ xfs_icsb_enable_counter(
 }
 
 void
-xfs_icsb_sync_counters_locked(
+xfs_icsb_sync_counters_flags(
 	xfs_mount_t	*mp,
 	int		flags)
 {
 	xfs_icsb_cnts_t	cnt;
 
+	/* Pass 1: lock all counters */
+	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
+		spin_lock(&mp->m_sb_lock);
+
 	xfs_icsb_count(mp, &cnt, flags);
 
+	/* Step 3: update mp->m_sb fields */
 	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
 		mp->m_sb.sb_icount = cnt.icsb_icount;
 	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
 		mp->m_sb.sb_ifree = cnt.icsb_ifree;
 	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
 		mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
+
+	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
+		spin_unlock(&mp->m_sb_lock);
 }
 
 /*
  * Accurate update of per-cpu counters to incore superblock
  */
-void
+STATIC void
 xfs_icsb_sync_counters(
-	xfs_mount_t	*mp,
-	int		flags)
+	xfs_mount_t	*mp)
 {
-	spin_lock(&mp->m_sb_lock);
-	xfs_icsb_sync_counters_locked(mp, flags);
-	spin_unlock(&mp->m_sb_lock);
+	xfs_icsb_sync_counters_flags(mp, 0);
 }
 
 /*
@@ -2325,15 +2332,19 @@ xfs_icsb_sync_counters(
 #define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
 		(uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
 STATIC void
-xfs_icsb_balance_counter_locked(
+xfs_icsb_balance_counter(
 	xfs_mount_t	*mp,
 	xfs_sb_field_t  field,
+	int		flags,
 	int		min_per_cpu)
 {
 	uint64_t	count, resid;
 	int		weight = num_online_cpus();
 	uint64_t	min = (uint64_t)min_per_cpu;
 
+	if (!(flags & XFS_ICSB_SB_LOCKED))
+		spin_lock(&mp->m_sb_lock);
+
 	/* disable counter and sync counter */
 	xfs_icsb_disable_counter(mp, field);
 
@@ -2343,19 +2354,19 @@ xfs_icsb_balance_counter_locked(
 		count = mp->m_sb.sb_icount;
 		resid = do_div(count, weight);
 		if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-			return;
+			goto out;
 		break;
 	case XFS_SBS_IFREE:
 		count = mp->m_sb.sb_ifree;
 		resid = do_div(count, weight);
 		if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-			return;
+			goto out;
 		break;
 	case XFS_SBS_FDBLOCKS:
 		count = mp->m_sb.sb_fdblocks;
 		resid = do_div(count, weight);
 		if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
-			return;
+			goto out;
 		break;
 	default:
 		BUG();
@@ -2364,17 +2375,9 @@ xfs_icsb_balance_counter_locked(
 	}
 
 	xfs_icsb_enable_counter(mp, field, count, resid);
-}
-
-STATIC void
-xfs_icsb_balance_counter(
-	xfs_mount_t	*mp,
-	xfs_sb_field_t  fields,
-	int		min_per_cpu)
-{
-	spin_lock(&mp->m_sb_lock);
-	xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
-	spin_unlock(&mp->m_sb_lock);
+out:
+	if (!(flags & XFS_ICSB_SB_LOCKED))
+		spin_unlock(&mp->m_sb_lock);
 }
 
 STATIC int
@@ -2481,7 +2484,7 @@ xfs_icsb_modify_counters(
 	 * we are done.
 	 */
 	if (ret != ENOSPC)
-		xfs_icsb_balance_counter(mp, field, 0);
+		xfs_icsb_balance_counter(mp, field, 0, 0);
 	xfs_icsb_unlock(mp);
 	return ret;
 
@@ -2505,7 +2508,7 @@ xfs_icsb_modify_counters(
 	 * will either succeed through the fast path or slow path without
 	 * another balance operation being required.
 	 */
-	xfs_icsb_balance_counter(mp, field, delta);
+	xfs_icsb_balance_counter(mp, field, 0, delta);
 	xfs_icsb_unlock(mp);
 	goto again;
 }
diff --git a/trunk/fs/xfs/xfs_mount.h b/trunk/fs/xfs/xfs_mount.h
index 63e0693a358a..1ed575110ff0 100644
--- a/trunk/fs/xfs/xfs_mount.h
+++ b/trunk/fs/xfs/xfs_mount.h
@@ -206,18 +206,17 @@ typedef struct xfs_icsb_cnts {
 
 #define XFS_ICSB_FLAG_LOCK	(1 << 0)	/* counter lock bit */
 
+#define XFS_ICSB_SB_LOCKED	(1 << 0)	/* sb already locked */
 #define XFS_ICSB_LAZY_COUNT	(1 << 1)	/* accuracy not needed */
 
 extern int	xfs_icsb_init_counters(struct xfs_mount *);
 extern void	xfs_icsb_reinit_counters(struct xfs_mount *);
-extern void	xfs_icsb_sync_counters(struct xfs_mount *, int);
-extern void	xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
+extern void	xfs_icsb_sync_counters_flags(struct xfs_mount *, int);
 
 #else
 #define xfs_icsb_init_counters(mp)	(0)
 #define xfs_icsb_reinit_counters(mp)	do { } while (0)
-#define xfs_icsb_sync_counters(mp, flags)	do { } while (0)
-#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
+#define xfs_icsb_sync_counters_flags(mp, flags)	do { } while (0)
 #endif
 
 typedef struct xfs_ail {
diff --git a/trunk/fs/xfs/xfs_rename.c b/trunk/fs/xfs/xfs_rename.c
index d8063e1ad298..ee371890d85d 100644
--- a/trunk/fs/xfs/xfs_rename.c
+++ b/trunk/fs/xfs/xfs_rename.c
@@ -55,32 +55,85 @@ xfs_rename_unlock4(
 
 	xfs_iunlock(i_tab[0], lock_mode);
 	for (i = 1; i < 4; i++) {
-		if (i_tab[i] == NULL)
+		if (i_tab[i] == NULL) {
 			break;
-
+		}
 		/*
 		 * Watch out for duplicate entries in the table.
 		 */
-		if (i_tab[i] != i_tab[i-1])
+		if (i_tab[i] != i_tab[i-1]) {
 			xfs_iunlock(i_tab[i], lock_mode);
+		}
 	}
 }
 
+#ifdef DEBUG
+int xfs_rename_skip, xfs_rename_nskip;
+#endif
+
 /*
- * Enter all inodes for a rename transaction into a sorted array.
+ * The following routine will acquire the locks required for a rename
+ * operation. The code understands the semantics of renames and will
+ * validate that name1 exists under dp1 & that name2 may or may not
+ * exist under dp2.
+ *
+ * We are renaming dp1/name1 to dp2/name2.
+ *
+ * Return ENOENT if dp1 does not exist, other lookup errors, or 0 for success.
  */
-STATIC void
-xfs_sort_for_rename(
+STATIC int
+xfs_lock_for_rename(
 	xfs_inode_t	*dp1,	/* in: old (source) directory inode */
 	xfs_inode_t	*dp2,	/* in: new (target) directory inode */
 	xfs_inode_t	*ip1,	/* in: inode of old entry */
-	xfs_inode_t	*ip2,	/* in: inode of new entry, if it
+	struct xfs_name	*name2,	/* in: new entry name */
+	xfs_inode_t	**ipp2,	/* out: inode of new entry, if it
 				   already exists, NULL otherwise. */
 	xfs_inode_t	**i_tab,/* out: array of inode returned, sorted */
 	int		*num_inodes)  /* out: number of inodes in array */
 {
+	xfs_inode_t		*ip2 = NULL;
 	xfs_inode_t		*temp;
+	xfs_ino_t		inum1, inum2;
+	int			error;
 	int			i, j;
+	uint			lock_mode;
+	int			diff_dirs = (dp1 != dp2);
+
+	/*
+	 * First, find out the current inums of the entries so that we
+	 * can determine the initial locking order.  We'll have to
+	 * sanity check stuff after all the locks have been acquired
+	 * to see if we still have the right inodes, directories, etc.
+	 */
+	lock_mode = xfs_ilock_map_shared(dp1);
+	IHOLD(ip1);
+	xfs_itrace_ref(ip1);
+
+	inum1 = ip1->i_ino;
+
+	/*
+	 * Unlock dp1 and lock dp2 if they are different.
+	 */
+	if (diff_dirs) {
+		xfs_iunlock_map_shared(dp1, lock_mode);
+		lock_mode = xfs_ilock_map_shared(dp2);
+	}
+
+	error = xfs_dir_lookup_int(dp2, lock_mode, name2, &inum2, &ip2);
+	if (error == ENOENT) {		/* target does not need to exist. */
+		inum2 = 0;
+	} else if (error) {
+		/*
+		 * If dp2 and dp1 are the same, the next line unlocks dp1.
+		 * Got it?
+		 */
+		xfs_iunlock_map_shared(dp2, lock_mode);
+		IRELE (ip1);
+		return error;
+	} else {
+		xfs_itrace_ref(ip2);
+	}
 
 	/*
 	 * i_tab contains a list of pointers to inodes.  We initialize
@@ -92,20 +145,21 @@ xfs_sort_for_rename(
 	i_tab[0] = dp1;
 	i_tab[1] = dp2;
 	i_tab[2] = ip1;
-	if (ip2) {
-		*num_inodes = 4;
-		i_tab[3] = ip2;
-	} else {
+	if (inum2 == 0) {
 		*num_inodes = 3;
 		i_tab[3] = NULL;
+	} else {
+		*num_inodes = 4;
+		i_tab[3] = ip2;
 	}
+	*ipp2 = i_tab[3];
 
 	/*
 	 * Sort the elements via bubble sort.  (Remember, there are at
 	 * most 4 elements to sort, so this is adequate.)
 	 */
-	for (i = 0; i < *num_inodes; i++) {
-		for (j = 1; j < *num_inodes; j++) {
+	for (i=0; i < *num_inodes; i++) {
+		for (j=1; j < *num_inodes; j++) {
 			if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
 				temp = i_tab[j];
 				i_tab[j] = i_tab[j-1];
@@ -113,6 +167,30 @@ xfs_sort_for_rename(
 			}
 		}
 	}
+
+	/*
+	 * We have dp2 locked. If it isn't first, unlock it.
+	 * If it is first, tell xfs_lock_inodes so it can skip it
+	 * when locking. if dp1 == dp2, xfs_lock_inodes will skip both
+	 * since they are equal. xfs_lock_inodes needs all these inodes
+	 * so that it can unlock and retry if there might be a dead-lock
+	 * potential with the log.
+	 */
+
+	if (i_tab[0] == dp2 && lock_mode == XFS_ILOCK_SHARED) {
+#ifdef DEBUG
+		xfs_rename_skip++;
+#endif
+		xfs_lock_inodes(i_tab, *num_inodes, 1, XFS_ILOCK_SHARED);
+	} else {
+#ifdef DEBUG
+		xfs_rename_nskip++;
+#endif
+		xfs_iunlock_map_shared(dp2, lock_mode);
+		xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED);
+	}
+
+	return 0;
 }
 
 /*
@@ -124,10 +202,10 @@ xfs_rename(
 	struct xfs_name	*src_name,
 	xfs_inode_t	*src_ip,
 	xfs_inode_t	*target_dp,
-	struct xfs_name	*target_name,
-	xfs_inode_t	*target_ip)
+	struct xfs_name	*target_name)
 {
-	xfs_trans_t	*tp = NULL;
+	xfs_trans_t	*tp;
+	xfs_inode_t	*target_ip;
 	xfs_mount_t	*mp = src_dp->i_mount;
 	int		new_parent;		/* moving to a new dir */
 	int		src_is_directory;	/* src_name is a directory */
@@ -137,7 +215,9 @@ xfs_rename(
 	int		cancel_flags;
 	int		committed;
 	xfs_inode_t	*inodes[4];
+	int		target_ip_dropped = 0;	/* dropped target_ip link? */
 	int		spaceres;
+	int		target_link_zero = 0;
 	int		num_inodes;
 
 	xfs_itrace_entry(src_dp);
@@ -150,27 +230,64 @@ xfs_rename(
 					target_dp, DM_RIGHT_NULL,
 					src_name->name, target_name->name,
 					0, 0, 0);
-		if (error)
+		if (error) {
 			return error;
+		}
 	}
 	/* Return through std_return after this point. */
 
-	new_parent = (src_dp != target_dp);
-	src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
+	/*
+	 * Lock all the participating inodes. Depending upon whether
+	 * the target_name exists in the target directory, and
+	 * whether the target directory is the same as the source
+	 * directory, we can lock from 2 to 4 inodes.
+	 * xfs_lock_for_rename() will return ENOENT if src_name
+	 * does not exist in the source directory.
+	 */
+	tp = NULL;
+	error = xfs_lock_for_rename(src_dp, target_dp, src_ip, target_name,
+					&target_ip, inodes, &num_inodes);
+	if (error) {
+		/*
+		 * We have nothing locked, no inode references, and
+		 * no transaction, so just get out.
+		 */
+		goto std_return;
+	}
+
+	ASSERT(src_ip != NULL);
 
-	if (src_is_directory) {
+	if ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
 		/*
 		 * Check for link count overflow on target_dp
 		 */
-		if (target_ip == NULL && new_parent &&
+		if (target_ip == NULL && (src_dp != target_dp) &&
 		    target_dp->i_d.di_nlink >= XFS_MAXLINK) {
 			error = XFS_ERROR(EMLINK);
-			goto std_return;
+			xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
+			goto rele_return;
 		}
 	}
 
-	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
-				inodes, &num_inodes);
+	/*
+	 * If we are using project inheritance, we only allow renames
+	 * into our tree when the project IDs are the same; else the
+	 * tree quota mechanism would be circumvented.
+	 */
+	if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
+		     (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
+		error = XFS_ERROR(EXDEV);
+		xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
+		goto rele_return;
+	}
+
+	new_parent = (src_dp != target_dp);
+	src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
+
+	/*
+	 * Drop the locks on our inodes so that we can start the transaction.
+	 */
+	xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
 
 	XFS_BMAP_INIT(&free_list, &first_block);
 	tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
@@ -185,7 +302,7 @@ xfs_rename(
 	}
 	if (error) {
 		xfs_trans_cancel(tp, 0);
-		goto std_return;
+		goto rele_return;
 	}
 
 	/*
@@ -193,29 +310,13 @@ xfs_rename(
 	 */
 	if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
 		xfs_trans_cancel(tp, cancel_flags);
-		goto std_return;
+		goto rele_return;
 	}
 
 	/*
-	 * Lock all the participating inodes. Depending upon whether
-	 * the target_name exists in the target directory, and
-	 * whether the target directory is the same as the source
-	 * directory, we can lock from 2 to 4 inodes.
-	 */
-	xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
-
-	/*
-	 * If we are using project inheritance, we only allow renames
-	 * into our tree when the project IDs are the same; else the
-	 * tree quota mechanism would be circumvented.
+	 * Reacquire the inode locks we dropped above.
 	 */
-	if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
-		     (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
-		error = XFS_ERROR(EXDEV);
-		xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
-		xfs_trans_cancel(tp, cancel_flags);
-		goto std_return;
-	}
+	xfs_lock_inodes(inodes, num_inodes, 0, XFS_ILOCK_EXCL);
 
 	/*
 	 * Join all the inodes to the transaction. From this point on,
@@ -227,17 +328,17 @@ xfs_rename(
 	 */
 	IHOLD(src_dp);
 	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
-
 	if (new_parent) {
 		IHOLD(target_dp);
 		xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
 	}
-
-	IHOLD(src_ip);
-	xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
-
-	if (target_ip) {
-		IHOLD(target_ip);
+	if ((src_ip != src_dp) && (src_ip != target_dp)) {
+		xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
+	}
+	if ((target_ip != NULL) &&
+	    (target_ip != src_ip) &&
+	    (target_ip != src_dp) &&
+	    (target_ip != target_dp)) {
 		xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
 	}
 
@@ -311,6 +412,7 @@ xfs_rename(
 		error = xfs_droplink(tp, target_ip);
 		if (error)
 			goto abort_return;
+		target_ip_dropped = 1;
 
 		if (src_is_directory) {
 			/*
@@ -320,6 +422,10 @@ xfs_rename(
 			if (error)
 				goto abort_return;
 		}
+
+		/* Do this test while we still hold the locks */
+		target_link_zero = (target_ip)->i_d.di_nlink==0;
+
 	} /* target_ip != NULL */
 
 	/*
@@ -385,6 +491,15 @@ xfs_rename(
 		xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
 	}
 
+	/*
+	 * If there was a target inode, take an extra reference on
+	 * it here so that it doesn't go to xfs_inactive() from
+	 * within the commit.
+	 */
+	if (target_ip != NULL) {
+		IHOLD(target_ip);
+	}
+
 	/*
 	 * If this is a synchronous mount, make sure that the
 	 * rename transaction goes to disk before returning to
@@ -394,11 +509,30 @@ xfs_rename(
 		xfs_trans_set_sync(tp);
 	}
 
+	/*
+	 * Take refs. for vop_link_removed calls below.  No need to worry
+	 * about directory refs. because the caller holds them.
+	 *
+	 * Do holds before the xfs_bmap_finish since it might rele them down
+	 * to zero.
+	 */
+
+	if (target_ip_dropped)
+		IHOLD(target_ip);
+	IHOLD(src_ip);
+
 	error = xfs_bmap_finish(&tp, &free_list, &committed);
 	if (error) {
 		xfs_bmap_cancel(&free_list);
 		xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
 				 XFS_TRANS_ABORT));
+		if (target_ip != NULL) {
+			IRELE(target_ip);
+		}
+		if (target_ip_dropped) {
+			IRELE(target_ip);
+		}
+		IRELE(src_ip);
 		goto std_return;
 	}
 
@@ -407,6 +541,15 @@ xfs_rename(
 	 * the vnode references.
 	 */
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+	if (target_ip != NULL)
+		IRELE(target_ip);
+	/*
+	 * Let interposed file systems know about removed links.
+	 */
+	if (target_ip_dropped)
+		IRELE(target_ip);
+
+	IRELE(src_ip);
 
 	/* Fall through to std_return with error = 0 or errno from
 	 * xfs_trans_commit	 */
@@ -428,4 +571,11 @@ xfs_rename(
 	xfs_bmap_cancel(&free_list);
 	xfs_trans_cancel(tp, cancel_flags);
 	goto std_return;
+
+ rele_return:
+	IRELE(src_ip);
+	if (target_ip != NULL) {
+		IRELE(target_ip);
+	}
+	goto std_return;
 }
diff --git a/trunk/fs/xfs/xfs_trans_inode.c b/trunk/fs/xfs/xfs_trans_inode.c
index 4c70bf5e9985..b8db1d5cde5a 100644
--- a/trunk/fs/xfs/xfs_trans_inode.c
+++ b/trunk/fs/xfs/xfs_trans_inode.c
@@ -111,13 +111,13 @@ xfs_trans_iget(
 		 */
 		ASSERT(ip->i_itemp != NULL);
 		ASSERT(lock_flags & XFS_ILOCK_EXCL);
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+		ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
 		ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
-		       xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+		       ismrlocked(&ip->i_iolock, MR_UPDATE));
 		ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
 		       (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL));
 		ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
-		       xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
+		       ismrlocked(&ip->i_iolock, (MR_UPDATE | MR_ACCESS)));
 		ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
 		       (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY));
 
@@ -185,7 +185,7 @@ xfs_trans_ijoin(
 	xfs_inode_log_item_t	*iip;
 
 	ASSERT(ip->i_transp == NULL);
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
 	ASSERT(lock_flags & XFS_ILOCK_EXCL);
 	if (ip->i_itemp == NULL)
 		xfs_inode_item_init(ip, ip->i_mount);
@@ -232,7 +232,7 @@ xfs_trans_ihold(
 {
 	ASSERT(ip->i_transp == tp);
 	ASSERT(ip->i_itemp != NULL);
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
 
 	ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
 }
@@ -257,7 +257,7 @@ xfs_trans_log_inode(
 
 	ASSERT(ip->i_transp == tp);
 	ASSERT(ip->i_itemp != NULL);
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
 
 	lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
 	ASSERT(lidp != NULL);
diff --git a/trunk/fs/xfs/xfs_utils.c b/trunk/fs/xfs/xfs_utils.c
index 98e5f110ba5f..2b8dc7e40772 100644
--- a/trunk/fs/xfs/xfs_utils.c
+++ b/trunk/fs/xfs/xfs_utils.c
@@ -41,6 +41,49 @@
 #include "xfs_utils.h"
 
 
+int
+xfs_dir_lookup_int(
+	xfs_inode_t	*dp,
+	uint		lock_mode,
+	struct xfs_name	*name,
+	xfs_ino_t	*inum,
+	xfs_inode_t	**ipp)
+{
+	int		error;
+
+	xfs_itrace_entry(dp);
+
+	error = xfs_dir_lookup(NULL, dp, name, inum);
+	if (!error) {
+		/*
+		 * Unlock the directory. We do this because we can't
+		 * hold the directory lock while doing the vn_get()
+		 * in xfs_iget().  Doing so could cause us to hold
+		 * a lock while waiting for the inode to finish
+		 * being inactive while it's waiting for a log
+		 * reservation in the inactive routine.
+		 */
+		xfs_iunlock(dp, lock_mode);
+		error = xfs_iget(dp->i_mount, NULL, *inum, 0, 0, ipp, 0);
+		xfs_ilock(dp, lock_mode);
+
+		if (error) {
+			*ipp = NULL;
+		} else if ((*ipp)->i_d.di_mode == 0) {
+			/*
+			 * The inode has been freed.  Something is
+			 * wrong so just get out of here.
+			 */
+			xfs_iunlock(dp, lock_mode);
+			xfs_iput_new(*ipp, 0);
+			*ipp = NULL;
+			xfs_ilock(dp, lock_mode);
+			error = XFS_ERROR(ENOENT);
+		}
+	}
+	return error;
+}
+
 /*
  * Allocates a new inode from disk and return a pointer to the
  * incore copy. This routine will internally commit the current
@@ -267,7 +310,7 @@ xfs_bump_ino_vers2(
 {
 	xfs_mount_t	*mp;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE));
 	ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1);
 
 	ip->i_d.di_version = XFS_DINODE_VERSION_2;
diff --git a/trunk/fs/xfs/xfs_utils.h b/trunk/fs/xfs/xfs_utils.h
index f316cb85d8e2..175b126d2cab 100644
--- a/trunk/fs/xfs/xfs_utils.h
+++ b/trunk/fs/xfs/xfs_utils.h
@@ -21,6 +21,8 @@
 #define IRELE(ip)	VN_RELE(XFS_ITOV(ip))
 #define IHOLD(ip)	VN_HOLD(XFS_ITOV(ip))
 
+extern int xfs_dir_lookup_int(xfs_inode_t *, uint, struct xfs_name *,
+				xfs_ino_t *, xfs_inode_t **);
 extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
 extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
 				xfs_dev_t, cred_t *, prid_t, int,
diff --git a/trunk/fs/xfs/xfs_vfsops.c b/trunk/fs/xfs/xfs_vfsops.c
index 30bacd8bb0e5..fc48158fe479 100644
--- a/trunk/fs/xfs/xfs_vfsops.c
+++ b/trunk/fs/xfs/xfs_vfsops.c
@@ -186,7 +186,6 @@ xfs_cleanup(void)
 	kmem_zone_destroy(xfs_efi_zone);
 	kmem_zone_destroy(xfs_ifork_zone);
 	kmem_zone_destroy(xfs_ili_zone);
-	kmem_zone_destroy(xfs_log_ticket_zone);
 }
 
 /*
diff --git a/trunk/fs/xfs/xfs_vnodeops.c b/trunk/fs/xfs/xfs_vnodeops.c
index 70702a60b4bb..6650601c64f7 100644
--- a/trunk/fs/xfs/xfs_vnodeops.c
+++ b/trunk/fs/xfs/xfs_vnodeops.c
@@ -75,6 +75,132 @@ xfs_open(
 	return 0;
 }
 
+/*
+ * xfs_getattr
+ */
+int
+xfs_getattr(
+	xfs_inode_t	*ip,
+	bhv_vattr_t	*vap,
+	int		flags)
+{
+	bhv_vnode_t	*vp = XFS_ITOV(ip);
+	xfs_mount_t	*mp = ip->i_mount;
+
+	xfs_itrace_entry(ip);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	if (!(flags & ATTR_LAZY))
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+	vap->va_size = XFS_ISIZE(ip);
+	if (vap->va_mask == XFS_AT_SIZE)
+		goto all_done;
+
+	vap->va_nblocks =
+		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+	vap->va_nodeid = ip->i_ino;
+#if XFS_BIG_INUMS
+	vap->va_nodeid += mp->m_inoadd;
+#endif
+	vap->va_nlink = ip->i_d.di_nlink;
+
+	/*
+	 * Quick exit for non-stat callers
+	 */
+	if ((vap->va_mask &
+	    ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID|
+	      XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0)
+		goto all_done;
+
+	/*
+	 * Copy from in-core inode.
+	 */
+	vap->va_mode = ip->i_d.di_mode;
+	vap->va_uid = ip->i_d.di_uid;
+	vap->va_gid = ip->i_d.di_gid;
+	vap->va_projid = ip->i_d.di_projid;
+
+	/*
+	 * Check vnode type block/char vs. everything else.
+	 */
+	switch (ip->i_d.di_mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		vap->va_rdev = ip->i_df.if_u2.if_rdev;
+		vap->va_blocksize = BLKDEV_IOSIZE;
+		break;
+	default:
+		vap->va_rdev = 0;
+
+		if (!(XFS_IS_REALTIME_INODE(ip))) {
+			vap->va_blocksize = xfs_preferred_iosize(mp);
+		} else {
+
+			/*
+			 * If the file blocks are being allocated from a
+			 * realtime partition, then return the inode's
+			 * realtime extent size or the realtime volume's
+			 * extent size.
+			 */
+			vap->va_blocksize =
+				xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
+		}
+		break;
+	}
+
+	vn_atime_to_timespec(vp, &vap->va_atime);
+	vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
+	vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
+	vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
+	vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
+
+	/*
+	 * Exit for stat callers.  See if any of the rest of the fields
+	 * to be filled in are needed.
+	 */
+	if ((vap->va_mask &
+	     (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
+	      XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
+		goto all_done;
+
+	/*
+	 * Convert di_flags to xflags.
+	 */
+	vap->va_xflags = xfs_ip2xflags(ip);
+
+	/*
+	 * Exit for inode revalidate.  See if any of the rest of
+	 * the fields to be filled in are needed.
+	 */
+	if ((vap->va_mask &
+	     (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
+	      XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
+		goto all_done;
+
+	vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog;
+	vap->va_nextents =
+		(ip->i_df.if_flags & XFS_IFEXTENTS) ?
+			ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) :
+			ip->i_d.di_nextents;
+	if (ip->i_afp)
+		vap->va_anextents =
+			(ip->i_afp->if_flags & XFS_IFEXTENTS) ?
+				ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) :
+				 ip->i_d.di_anextents;
+	else
+		vap->va_anextents = 0;
+	vap->va_gen = ip->i_d.di_gen;
+
+ all_done:
+	if (!(flags & ATTR_LAZY))
+		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	return 0;
+}
+
+
 /*
  * xfs_setattr
  */
@@ -85,6 +211,7 @@ xfs_setattr(
 	int			flags,
 	cred_t			*credp)
 {
+	bhv_vnode_t		*vp = XFS_ITOV(ip);
 	xfs_mount_t		*mp = ip->i_mount;
 	xfs_trans_t		*tp;
 	int			mask;
@@ -95,6 +222,7 @@ xfs_setattr(
 	gid_t			gid=0, igid=0;
 	int			timeflags = 0;
 	xfs_prid_t		projid=0, iprojid=0;
+	int			mandlock_before, mandlock_after;
 	struct xfs_dquot	*udqp, *gdqp, *olddquot1, *olddquot2;
 	int			file_owner;
 	int			need_iolock = 1;
@@ -255,7 +383,7 @@ xfs_setattr(
 				m |= S_ISGID;
 #if 0
 			/* Linux allows this, Irix doesn't. */
-			if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
+			if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp))
 				m |= S_ISVTX;
 #endif
 			if (m && !capable(CAP_FSETID))
@@ -333,10 +461,10 @@ xfs_setattr(
 			goto error_return;
 		}
 
-		if (S_ISDIR(ip->i_d.di_mode)) {
+		if (VN_ISDIR(vp)) {
 			code = XFS_ERROR(EISDIR);
 			goto error_return;
-		} else if (!S_ISREG(ip->i_d.di_mode)) {
+		} else if (!VN_ISREG(vp)) {
 			code = XFS_ERROR(EINVAL);
 			goto error_return;
 		}
@@ -498,6 +626,9 @@ xfs_setattr(
 		xfs_trans_ihold(tp, ip);
 	}
 
+	/* determine whether mandatory locking mode changes */
+	mandlock_before = MANDLOCK(vp, ip->i_d.di_mode);
+
 	/*
 	 * Truncate file.  Must have write permission and not be a directory.
 	 */
@@ -727,6 +858,13 @@ xfs_setattr(
 		code = xfs_trans_commit(tp, commit_flags);
 	}
 
+	/*
+	 * If the (regular) file's mandatory locking mode changed, then
+	 * notify the vnode.  We do this under the inode lock to prevent
+	 * racing calls to vop_vnode_change.
+	 */
+	mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
+
 	xfs_iunlock(ip, lock_flags);
 
 	/*
@@ -1305,7 +1443,7 @@ xfs_inactive_attrs(
 	int		error;
 	xfs_mount_t	*mp;
 
-	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+	ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
 	tp = *tpp;
 	mp = ip->i_mount;
 	ASSERT(ip->i_d.di_forkoff != 0);
@@ -1353,7 +1491,7 @@ xfs_release(
 	xfs_mount_t	*mp = ip->i_mount;
 	int		error;
 
-	if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
+	if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0))
 		return 0;
 
 	/* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1636,7 +1774,8 @@ xfs_lookup(
 	struct xfs_name		*name,
 	xfs_inode_t		**ipp)
 {
-	xfs_ino_t		inum;
+	xfs_inode_t		*ip;
+	xfs_ino_t		e_inum;
 	int			error;
 	uint			lock_mode;
 
@@ -1646,21 +1785,12 @@ xfs_lookup(
 		return XFS_ERROR(EIO);
 
 	lock_mode = xfs_ilock_map_shared(dp);
-	error = xfs_dir_lookup(NULL, dp, name, &inum);
+	error = xfs_dir_lookup_int(dp, lock_mode, name, &e_inum, &ip);
+	if (!error) {
+		*ipp = ip;
+		xfs_itrace_ref(ip);
+	}
 	xfs_iunlock_map_shared(dp, lock_mode);
-
-	if (error)
-		goto out;
-
-	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
-	if (error)
-		goto out;
-
-	xfs_itrace_ref(*ipp);
-	return 0;
-
- out:
-	*ipp = NULL;
 	return error;
 }
 
@@ -1776,7 +1906,7 @@ xfs_create(
 	 * It is locked (and joined to the transaction).
 	 */
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE));
 
 	/*
 	 * Now we join the directory inode to the transaction.  We do not do it
@@ -1982,7 +2112,7 @@ xfs_lock_dir_and_entry(
 
 		ips[0] = ip;
 		ips[1] = dp;
-		xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
+		xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
 	}
 	/* else	 e_inum == dp->i_ino */
 	/*     This can happen if we're asked to lock /x/..
@@ -2030,6 +2160,7 @@ void
 xfs_lock_inodes(
 	xfs_inode_t	**ips,
 	int		inodes,
+	int		first_locked,
 	uint		lock_mode)
 {
 	int		attempts = 0, i, j, try_lock;
@@ -2037,8 +2168,13 @@ xfs_lock_inodes(
 
 	ASSERT(ips && (inodes >= 2)); /* we need at least two */
 
-	try_lock = 0;
-	i = 0;
+	if (first_locked) {
+		try_lock = 1;
+		i = 1;
+	} else {
+		try_lock = 0;
+		i = 0;
+	}
 
 again:
 	for (; i < inodes; i++) {
@@ -2162,14 +2298,29 @@ xfs_remove(
 			return error;
 	}
 
+	/*
+	 * We need to get a reference to ip before we get our log
+	 * reservation. The reason for this is that we cannot call
+	 * xfs_iget for an inode for which we do not have a reference
+	 * once we've acquired a log reservation. This is because the
+	 * inode we are trying to get might be in xfs_inactive going
+	 * for a log reservation. Since we'll have to wait for the
+	 * inactive code to complete before returning from xfs_iget,
+	 * we need to make sure that we don't have log space reserved
+	 * when we call xfs_iget.  Instead we get an unlocked reference
+	 * to the inode before getting our log reservation.
+	 */
+	IHOLD(ip);
+
 	xfs_itrace_entry(ip);
 	xfs_itrace_ref(ip);
 
 	error = XFS_QM_DQATTACH(mp, dp, 0);
-	if (!error)
+	if (!error && dp != ip)
 		error = XFS_QM_DQATTACH(mp, ip, 0);
 	if (error) {
 		REMOVE_DEBUG_TRACE(__LINE__);
+		IRELE(ip);
 		goto std_return;
 	}
 
@@ -2196,6 +2347,7 @@ xfs_remove(
 		ASSERT(error != ENOSPC);
 		REMOVE_DEBUG_TRACE(__LINE__);
 		xfs_trans_cancel(tp, 0);
+		IRELE(ip);
 		return error;
 	}
 
@@ -2203,6 +2355,7 @@ xfs_remove(
 	if (error) {
 		REMOVE_DEBUG_TRACE(__LINE__);
 		xfs_trans_cancel(tp, cancel_flags);
+		IRELE(ip);
 		goto std_return;
 	}
 
@@ -2210,18 +2363,23 @@ xfs_remove(
 	 * At this point, we've gotten both the directory and the entry
 	 * inodes locked.
 	 */
-	IHOLD(ip);
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-
-	IHOLD(dp);
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+	if (dp != ip) {
+		/*
+		 * Increment vnode ref count only in this case since
+		 * there's an extra vnode reference in the case where
+		 * dp == ip.
+		 */
+		IHOLD(dp);
+		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+	}
 
 	/*
 	 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
 	 */
 	XFS_BMAP_INIT(&free_list, &first_block);
 	error = xfs_dir_removename(tp, dp, name, ip->i_ino,
-					&first_block, &free_list, resblks);
+					&first_block, &free_list, 0);
 	if (error) {
 		ASSERT(error != ENOENT);
 		REMOVE_DEBUG_TRACE(__LINE__);
@@ -2243,6 +2401,12 @@ xfs_remove(
 	 */
 	link_zero = (ip)->i_d.di_nlink==0;
 
+	/*
+	 * Take an extra ref on the inode so that it doesn't
+	 * go to xfs_inactive() from within the commit.
+	 */
+	IHOLD(ip);
+
 	/*
 	 * If this is a synchronous mount, make sure that the
 	 * remove transaction goes to disk before returning to
@@ -2259,8 +2423,10 @@ xfs_remove(
 	}
 
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-	if (error)
+	if (error) {
+		IRELE(ip);
 		goto std_return;
+	}
 
 	/*
 	 * If we are using filestreams, kill the stream association.
@@ -2272,6 +2438,7 @@ xfs_remove(
 		xfs_filestream_deassociate(ip);
 
 	xfs_itrace_exit(ip);
+	IRELE(ip);
 
 /*	Fall through to std_return with error = 0 */
  std_return:
@@ -2300,6 +2467,8 @@ xfs_remove(
 	cancel_flags |= XFS_TRANS_ABORT;
 	xfs_trans_cancel(tp, cancel_flags);
 
+	IRELE(ip);
+
 	goto std_return;
 }
 
@@ -2367,7 +2536,7 @@ xfs_link(
 		ips[1] = sip;
 	}
 
-	xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
+	xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
 
 	/*
 	 * Increment vnode ref counts since xfs_trans_commit &
@@ -2671,6 +2840,7 @@ xfs_rmdir(
 	struct xfs_name		*name,
 	xfs_inode_t		*cdp)
 {
+	bhv_vnode_t		*dir_vp = XFS_ITOV(dp);
 	xfs_mount_t		*mp = dp->i_mount;
 	xfs_trans_t             *tp;
 	int                     error;
@@ -2695,13 +2865,28 @@ xfs_rmdir(
 			return XFS_ERROR(error);
 	}
 
+	/*
+	 * We need to get a reference to cdp before we get our log
+	 * reservation.  The reason for this is that we cannot call
+	 * xfs_iget for an inode for which we do not have a reference
+	 * once we've acquired a log reservation.  This is because the
+	 * inode we are trying to get might be in xfs_inactive going
+	 * for a log reservation.  Since we'll have to wait for the
+	 * inactive code to complete before returning from xfs_iget,
+	 * we need to make sure that we don't have log space reserved
+	 * when we call xfs_iget.  Instead we get an unlocked reference
+	 * to the inode before getting our log reservation.
+	 */
+	IHOLD(cdp);
+
 	/*
 	 * Get the dquots for the inodes.
 	 */
 	error = XFS_QM_DQATTACH(mp, dp, 0);
-	if (!error)
+	if (!error && dp != cdp)
 		error = XFS_QM_DQATTACH(mp, cdp, 0);
 	if (error) {
+		IRELE(cdp);
 		REMOVE_DEBUG_TRACE(__LINE__);
 		goto std_return;
 	}
@@ -2728,6 +2913,7 @@ xfs_rmdir(
 	if (error) {
 		ASSERT(error != ENOSPC);
 		cancel_flags = 0;
+		IRELE(cdp);
 		goto error_return;
 	}
 	XFS_BMAP_INIT(&free_list, &first_block);
@@ -2741,13 +2927,21 @@ xfs_rmdir(
 	error = xfs_lock_dir_and_entry(dp, cdp);
 	if (error) {
 		xfs_trans_cancel(tp, cancel_flags);
+		IRELE(cdp);
 		goto std_return;
 	}
 
-	IHOLD(dp);
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
+	if (dp != cdp) {
+		/*
+		 * Only increment the parent directory vnode count if
+		 * we didn't bump it in looking up cdp.  The only time
+		 * we don't bump it is when we're looking up ".".
+		 */
+		VN_HOLD(dir_vp);
+	}
 
-	IHOLD(cdp);
+	xfs_itrace_ref(cdp);
 	xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
 
 	ASSERT(cdp->i_d.di_nlink >= 2);
@@ -2800,6 +2994,12 @@ xfs_rmdir(
 	/* Determine these before committing transaction */
 	last_cdp_link = (cdp)->i_d.di_nlink==0;
 
+	/*
+	 * Take an extra ref on the child vnode so that it
+	 * does not go to xfs_inactive() from within the commit.
+	 */
+	IHOLD(cdp);
+
 	/*
 	 * If this is a synchronous mount, make sure that the
 	 * rmdir transaction goes to disk before returning to
@@ -2814,15 +3014,19 @@ xfs_rmdir(
 		xfs_bmap_cancel(&free_list);
 		xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
 				 XFS_TRANS_ABORT));
+		IRELE(cdp);
 		goto std_return;
 	}
 
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
 	if (error) {
+		IRELE(cdp);
 		goto std_return;
 	}
 
 
+	IRELE(cdp);
+
 	/* Fall through to std_return with error = 0 or the errno
 	 * from xfs_trans_commit. */
  std_return:
diff --git a/trunk/fs/xfs/xfs_vnodeops.h b/trunk/fs/xfs/xfs_vnodeops.h
index 8abe8f186e20..24c53923dc2c 100644
--- a/trunk/fs/xfs/xfs_vnodeops.h
+++ b/trunk/fs/xfs/xfs_vnodeops.h
@@ -15,6 +15,7 @@ struct xfs_iomap;
 
 
 int xfs_open(struct xfs_inode *ip);
+int xfs_getattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags);
 int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
 		struct cred *credp);
 int xfs_readlink(struct xfs_inode *ip, char *link);
@@ -47,9 +48,9 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd,
 		struct cred *credp, int	attr_flags);
 int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
 		struct xfs_inode *src_ip, struct xfs_inode *target_dp,
-		struct xfs_name *target_name, struct xfs_inode *target_ip);
+		struct xfs_name *target_name);
 int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value,
-		int *valuelenp, int flags);
+		int *valuelenp, int flags, cred_t *cred);
 int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,
 		int valuelen, int flags);
 int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags);
@@ -60,6 +61,9 @@ int xfs_ioctl(struct xfs_inode *ip, struct file *filp,
 ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
 		const struct iovec *iovp, unsigned int segs,
 		loff_t *offset, int ioflags);
+ssize_t xfs_sendfile(struct xfs_inode *ip, struct file *filp,
+		loff_t *offset, int ioflags, size_t count,
+		read_actor_t actor, void *target);
 ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp,
 		loff_t *ppos, struct pipe_inode_info *pipe, size_t count,
 		int flags, int ioflags);
diff --git a/trunk/fs/ext4/ext4.h b/trunk/include/linux/ext4_fs.h
similarity index 99%
rename from trunk/fs/ext4/ext4.h
rename to trunk/include/linux/ext4_fs.h
index 8158083f7ac0..250032548597 100644
--- a/trunk/fs/ext4/ext4.h
+++ b/trunk/include/linux/ext4_fs.h
@@ -1,5 +1,5 @@
 /*
- *  ext4.h
+ *  linux/include/linux/ext4_fs.h
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -13,13 +13,14 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
-#ifndef _EXT4_H
-#define _EXT4_H
+#ifndef _LINUX_EXT4_FS_H
+#define _LINUX_EXT4_FS_H
 
 #include <linux/types.h>
 #include <linux/blkdev.h>
 #include <linux/magic.h>
-#include "ext4_i.h"
+
+#include <linux/ext4_fs_i.h>
 
 /*
  * The second extended filesystem constants/structures
@@ -175,7 +176,8 @@ struct ext4_group_desc
 #define EXT4_BG_INODE_ZEROED	0x0004 /* On-disk itable initialized to zero */
 
 #ifdef __KERNEL__
-#include "ext4_sb.h"
+#include <linux/ext4_fs_i.h>
+#include <linux/ext4_fs_sb.h>
 #endif
 /*
  * Macro-instructions used to manage group descriptors
@@ -229,7 +231,6 @@ struct ext4_group_desc
 #define EXT4_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
 #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
 #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
-#define EXT4_EXT_MIGRATE		0x00100000 /* Inode is migrating */
 #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
 
 #define EXT4_FL_USER_VISIBLE		0x000BDFFF /* User visible flags */
@@ -1048,7 +1049,8 @@ extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
 		struct address_space *mapping, loff_t from);
 
 /* ioctl.c */
-extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
+extern int ext4_ioctl (struct inode *, struct file *, unsigned int,
+		       unsigned long);
 extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
 
 /* migrate.c */
@@ -1202,4 +1204,4 @@ extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
 			int extend_disksize);
 #endif	/* __KERNEL__ */
 
-#endif	/* _EXT4_H */
+#endif	/* _LINUX_EXT4_FS_H */
diff --git a/trunk/fs/ext4/ext4_extents.h b/trunk/include/linux/ext4_fs_extents.h
similarity index 98%
rename from trunk/fs/ext4/ext4_extents.h
rename to trunk/include/linux/ext4_fs_extents.h
index 75333b595fab..1285c583b2d8 100644
--- a/trunk/fs/ext4/ext4_extents.h
+++ b/trunk/include/linux/ext4_fs_extents.h
@@ -16,10 +16,10 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
  */
 
-#ifndef _EXT4_EXTENTS
-#define _EXT4_EXTENTS
+#ifndef _LINUX_EXT4_EXTENTS
+#define _LINUX_EXT4_EXTENTS
 
-#include "ext4.h"
+#include <linux/ext4_fs.h>
 
 /*
  * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks
@@ -228,5 +228,5 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
 extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
 						ext4_lblk_t *, ext4_fsblk_t *);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
-#endif /* _EXT4_EXTENTS */
+#endif /* _LINUX_EXT4_EXTENTS */
 
diff --git a/trunk/fs/ext4/ext4_i.h b/trunk/include/linux/ext4_fs_i.h
similarity index 97%
rename from trunk/fs/ext4/ext4_i.h
rename to trunk/include/linux/ext4_fs_i.h
index 26a4ae255d79..d5508d3cf290 100644
--- a/trunk/fs/ext4/ext4_i.h
+++ b/trunk/include/linux/ext4_fs_i.h
@@ -1,5 +1,5 @@
 /*
- *  ext4_i.h
+ *  linux/include/linux/ext4_fs_i.h
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -13,8 +13,8 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
-#ifndef _EXT4_I
-#define _EXT4_I
+#ifndef _LINUX_EXT4_FS_I
+#define _LINUX_EXT4_FS_I
 
 #include <linux/rwsem.h>
 #include <linux/rbtree.h>
@@ -164,4 +164,4 @@ struct ext4_inode_info {
 	spinlock_t i_prealloc_lock;
 };
 
-#endif	/* _EXT4_I */
+#endif	/* _LINUX_EXT4_FS_I */
diff --git a/trunk/fs/ext4/ext4_sb.h b/trunk/include/linux/ext4_fs_sb.h
similarity index 97%
rename from trunk/fs/ext4/ext4_sb.h
rename to trunk/include/linux/ext4_fs_sb.h
index 5802e69f2191..abaae2c8cccf 100644
--- a/trunk/fs/ext4/ext4_sb.h
+++ b/trunk/include/linux/ext4_fs_sb.h
@@ -1,5 +1,5 @@
 /*
- *  ext4_sb.h
+ *  linux/include/linux/ext4_fs_sb.h
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -13,8 +13,8 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
-#ifndef _EXT4_SB
-#define _EXT4_SB
+#ifndef _LINUX_EXT4_FS_SB
+#define _LINUX_EXT4_FS_SB
 
 #ifdef __KERNEL__
 #include <linux/timer.h>
@@ -145,4 +145,4 @@ struct ext4_sb_info {
 	struct ext4_locality_group *s_locality_groups;
 };
 
-#endif	/* _EXT4_SB */
+#endif	/* _LINUX_EXT4_FS_SB */
diff --git a/trunk/fs/ext4/ext4_jbd2.h b/trunk/include/linux/ext4_jbd2.h
similarity index 98%
rename from trunk/fs/ext4/ext4_jbd2.h
rename to trunk/include/linux/ext4_jbd2.h
index 9255a7d28b24..38c71d3c8dbf 100644
--- a/trunk/fs/ext4/ext4_jbd2.h
+++ b/trunk/include/linux/ext4_jbd2.h
@@ -1,5 +1,5 @@
 /*
- * ext4_jbd2.h
+ * linux/include/linux/ext4_jbd2.h
  *
  * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
  *
@@ -12,12 +12,12 @@
  * Ext4-specific journaling extensions.
  */
 
-#ifndef _EXT4_JBD2_H
-#define _EXT4_JBD2_H
+#ifndef _LINUX_EXT4_JBD2_H
+#define _LINUX_EXT4_JBD2_H
 
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include "ext4.h"
+#include <linux/ext4_fs.h>
 
 #define EXT4_JOURNAL(inode)	(EXT4_SB((inode)->i_sb)->s_journal)
 
@@ -228,4 +228,4 @@ static inline int ext4_should_writeback_data(struct inode *inode)
 	return 0;
 }
 
-#endif	/* _EXT4_JBD2_H */
+#endif	/* _LINUX_EXT4_JBD2_H */