diff --git a/[refs] b/[refs]
index 47f4c8a38296..ca06b971490f 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 94ebd235c493f43681f609b0e02733337053e8f0
+refs/heads/master: 68eda8f59081c74a51d037cc29893bd7c9b3c2d8
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index 3d4179fbc526..f2a2b8e647c5 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -1527,8 +1527,6 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 S:	Supported
 F:	Documentation/filesystems/ceph.txt
 F:	fs/ceph
-F:	net/ceph
-F:	include/linux/ceph
 
 CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
 M:	David Vrabel <david.vrabel@csr.com>
@@ -4807,15 +4805,6 @@ F:	fs/qnx4/
 F:	include/linux/qnx4_fs.h
 F:	include/linux/qnxtypes.h
 
-RADOS BLOCK DEVICE (RBD)
-F:	include/linux/qnxtypes.h
-M:	Yehuda Sadeh <yehuda@hq.newdream.net>
-M:	Sage Weil <sage@newdream.net>
-M:	ceph-devel@vger.kernel.org
-S:	Supported
-F:	drivers/block/rbd.c
-F:	drivers/block/rbd_types.h
-
 RADEON FRAMEBUFFER DISPLAY DRIVER
 M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
 L:	linux-fbdev@vger.kernel.org
diff --git a/trunk/drivers/block/Kconfig b/trunk/drivers/block/Kconfig
index 4b9359a6f6ca..de277689da61 100644
--- a/trunk/drivers/block/Kconfig
+++ b/trunk/drivers/block/Kconfig
@@ -488,21 +488,4 @@ config BLK_DEV_HD
 
 	  If unsure, say N.
 
-config BLK_DEV_RBD
-	tristate "Rados block device (RBD)"
-	depends on INET && EXPERIMENTAL && BLOCK
-	select CEPH_LIB
-	select LIBCRC32C
-	select CRYPTO_AES
-	select CRYPTO
-	default n
-	help
-	  Say Y here if you want include the Rados block device, which stripes
-	  a block device over objects stored in the Ceph distributed object
-	  store.
-
-	  More information at http://ceph.newdream.net/.
-
-	  If unsure, say N.
-
 endif # BLK_DEV
diff --git a/trunk/drivers/block/Makefile b/trunk/drivers/block/Makefile
index d7f463d6312d..aff5ac925c34 100644
--- a/trunk/drivers/block/Makefile
+++ b/trunk/drivers/block/Makefile
@@ -37,6 +37,5 @@ obj-$(CONFIG_BLK_DEV_HD)	+= hd.o
 
 obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o
 obj-$(CONFIG_BLK_DEV_DRBD)     += drbd/
-obj-$(CONFIG_BLK_DEV_RBD)     += rbd.o
 
 swim_mod-objs	:= swim.o swim_asm.o
diff --git a/trunk/drivers/block/rbd.c b/trunk/drivers/block/rbd.c
deleted file mode 100644
index 6ec9d53806c5..000000000000
--- a/trunk/drivers/block/rbd.c
+++ /dev/null
@@ -1,1841 +0,0 @@
-/*
-   rbd.c -- Export ceph rados objects as a Linux block device
-
-
-   based on drivers/block/osdblk.c:
-
-   Copyright 2009 Red Hat, Inc.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; see the file COPYING.  If not, write to
-   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-
-
-
-   Instructions for use
-   --------------------
-
-   1) Map a Linux block device to an existing rbd image.
-
-      Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name]
-
-      $ echo "192.168.0.1 name=admin rbd foo" > /sys/class/rbd/add
-
-      The snapshot name can be "-" or omitted to map the image read/write.
-
-   2) List all active blkdev<->object mappings.
-
-      In this example, we have performed step #1 twice, creating two blkdevs,
-      mapped to two separate rados objects in the rados rbd pool
-
-      $ cat /sys/class/rbd/list
-      #id     major   client_name     pool    name    snap    KB
-      0       254     client4143      rbd     foo     -      1024000
-
-      The columns, in order, are:
-      - blkdev unique id
-      - blkdev assigned major
-      - rados client id
-      - rados pool name
-      - rados block device name
-      - mapped snapshot ("-" if none)
-      - device size in KB
-
-
-   3) Create a snapshot.
-
-      Usage: <blkdev id> <snapname>
-
-      $ echo "0 mysnap" > /sys/class/rbd/snap_create
-
-
-   4) Listing a snapshot.
-
-      $ cat /sys/class/rbd/snaps_list
-      #id     snap    KB
-      0       -       1024000 (*)
-      0       foo     1024000
-
-      The columns, in order, are:
-      - blkdev unique id
-      - snapshot name, '-' means none (active read/write version)
-      - size of device at time of snapshot
-      - the (*) indicates this is the active version
-
-   5) Rollback to snapshot.
-
-      Usage: <blkdev id> <snapname>
-
-      $ echo "0 mysnap" > /sys/class/rbd/snap_rollback
-
-
-   6) Mapping an image using snapshot.
-
-      A snapshot mapping is read-only. This is being done by passing
-      snap=<snapname> to the options when adding a device.
-
-      $ echo "192.168.0.1 name=admin,snap=mysnap rbd foo" > /sys/class/rbd/add
-
-
-   7) Remove an active blkdev<->rbd image mapping.
-
-      In this example, we remove the mapping with blkdev unique id 1.
-
-      $ echo 1 > /sys/class/rbd/remove
-
-
-   NOTE:  The actual creation and deletion of rados objects is outside the scope
-   of this driver.
-
- */
-
-#include <linux/ceph/libceph.h>
-#include <linux/ceph/osd_client.h>
-#include <linux/ceph/mon_client.h>
-#include <linux/ceph/decode.h>
-
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/blkdev.h>
-
-#include "rbd_types.h"
-
-#define DRV_NAME "rbd"
-#define DRV_NAME_LONG "rbd (rados block device)"
-
-#define RBD_MINORS_PER_MAJOR	256		/* max minors per blkdev */
-
-#define RBD_MAX_MD_NAME_LEN	(96 + sizeof(RBD_SUFFIX))
-#define RBD_MAX_POOL_NAME_LEN	64
-#define RBD_MAX_SNAP_NAME_LEN	32
-#define RBD_MAX_OPT_LEN		1024
-
-#define RBD_SNAP_HEAD_NAME	"-"
-
-#define DEV_NAME_LEN		32
-
-/*
- * block device image metadata (in-memory version)
- */
-struct rbd_image_header {
-	u64 image_size;
-	char block_name[32];
-	__u8 obj_order;
-	__u8 crypt_type;
-	__u8 comp_type;
-	struct rw_semaphore snap_rwsem;
-	struct ceph_snap_context *snapc;
-	size_t snap_names_len;
-	u64 snap_seq;
-	u32 total_snaps;
-
-	char *snap_names;
-	u64 *snap_sizes;
-};
-
-/*
- * an instance of the client.  multiple devices may share a client.
- */
-struct rbd_client {
-	struct ceph_client	*client;
-	struct kref		kref;
-	struct list_head	node;
-};
-
-/*
- * a single io request
- */
-struct rbd_request {
-	struct request		*rq;		/* blk layer request */
-	struct bio		*bio;		/* cloned bio */
-	struct page		**pages;	/* list of used pages */
-	u64			len;
-};
-
-/*
- * a single device
- */
-struct rbd_device {
-	int			id;		/* blkdev unique id */
-
-	int			major;		/* blkdev assigned major */
-	struct gendisk		*disk;		/* blkdev's gendisk and rq */
-	struct request_queue	*q;
-
-	struct ceph_client	*client;
-	struct rbd_client	*rbd_client;
-
-	char			name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
-
-	spinlock_t		lock;		/* queue lock */
-
-	struct rbd_image_header	header;
-	char			obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
-	int			obj_len;
-	char			obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
-	char			pool_name[RBD_MAX_POOL_NAME_LEN];
-	int			poolid;
-
-	char                    snap_name[RBD_MAX_SNAP_NAME_LEN];
-	u32 cur_snap;	/* index+1 of current snapshot within snap context
-			   0 - for the head */
-	int read_only;
-
-	struct list_head	node;
-};
-
-static spinlock_t node_lock;      /* protects client get/put */
-
-static struct class *class_rbd;	  /* /sys/class/rbd */
-static DEFINE_MUTEX(ctl_mutex);	  /* Serialize open/close/setup/teardown */
-static LIST_HEAD(rbd_dev_list);    /* devices */
-static LIST_HEAD(rbd_client_list);      /* clients */
-
-
-static int rbd_open(struct block_device *bdev, fmode_t mode)
-{
-	struct gendisk *disk = bdev->bd_disk;
-	struct rbd_device *rbd_dev = disk->private_data;
-
-	set_device_ro(bdev, rbd_dev->read_only);
-
-	if ((mode & FMODE_WRITE) && rbd_dev->read_only)
-		return -EROFS;
-
-	return 0;
-}
-
-static const struct block_device_operations rbd_bd_ops = {
-	.owner			= THIS_MODULE,
-	.open			= rbd_open,
-};
-
-/*
- * Initialize an rbd client instance.
- * We own *opt.
- */
-static struct rbd_client *rbd_client_create(struct ceph_options *opt)
-{
-	struct rbd_client *rbdc;
-	int ret = -ENOMEM;
-
-	dout("rbd_client_create\n");
-	rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
-	if (!rbdc)
-		goto out_opt;
-
-	kref_init(&rbdc->kref);
-	INIT_LIST_HEAD(&rbdc->node);
-
-	rbdc->client = ceph_create_client(opt, rbdc);
-	if (IS_ERR(rbdc->client))
-		goto out_rbdc;
-	opt = NULL; /* Now rbdc->client is responsible for opt */
-
-	ret = ceph_open_session(rbdc->client);
-	if (ret < 0)
-		goto out_err;
-
-	spin_lock(&node_lock);
-	list_add_tail(&rbdc->node, &rbd_client_list);
-	spin_unlock(&node_lock);
-
-	dout("rbd_client_create created %p\n", rbdc);
-	return rbdc;
-
-out_err:
-	ceph_destroy_client(rbdc->client);
-out_rbdc:
-	kfree(rbdc);
-out_opt:
-	if (opt)
-		ceph_destroy_options(opt);
-	return ERR_PTR(ret);
-}
-
-/*
- * Find a ceph client with specific addr and configuration.
- */
-static struct rbd_client *__rbd_client_find(struct ceph_options *opt)
-{
-	struct rbd_client *client_node;
-
-	if (opt->flags & CEPH_OPT_NOSHARE)
-		return NULL;
-
-	list_for_each_entry(client_node, &rbd_client_list, node)
-		if (ceph_compare_options(opt, client_node->client) == 0)
-			return client_node;
-	return NULL;
-}
-
-/*
- * Get a ceph client with specific addr and configuration, if one does
- * not exist create it.
- */
-static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr,
-			  char *options)
-{
-	struct rbd_client *rbdc;
-	struct ceph_options *opt;
-	int ret;
-
-	ret = ceph_parse_options(&opt, options, mon_addr,
-				 mon_addr + strlen(mon_addr), NULL, NULL);
-	if (ret < 0)
-		return ret;
-
-	spin_lock(&node_lock);
-	rbdc = __rbd_client_find(opt);
-	if (rbdc) {
-		ceph_destroy_options(opt);
-
-		/* using an existing client */
-		kref_get(&rbdc->kref);
-		rbd_dev->rbd_client = rbdc;
-		rbd_dev->client = rbdc->client;
-		spin_unlock(&node_lock);
-		return 0;
-	}
-	spin_unlock(&node_lock);
-
-	rbdc = rbd_client_create(opt);
-	if (IS_ERR(rbdc))
-		return PTR_ERR(rbdc);
-
-	rbd_dev->rbd_client = rbdc;
-	rbd_dev->client = rbdc->client;
-	return 0;
-}
-
-/*
- * Destroy ceph client
- */
-static void rbd_client_release(struct kref *kref)
-{
-	struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
-
-	dout("rbd_release_client %p\n", rbdc);
-	spin_lock(&node_lock);
-	list_del(&rbdc->node);
-	spin_unlock(&node_lock);
-
-	ceph_destroy_client(rbdc->client);
-	kfree(rbdc);
-}
-
-/*
- * Drop reference to ceph client node. If it's not referenced anymore, release
- * it.
- */
-static void rbd_put_client(struct rbd_device *rbd_dev)
-{
-	kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
-	rbd_dev->rbd_client = NULL;
-	rbd_dev->client = NULL;
-}
-
-
-/*
- * Create a new header structure, translate header format from the on-disk
- * header.
- */
-static int rbd_header_from_disk(struct rbd_image_header *header,
-				 struct rbd_image_header_ondisk *ondisk,
-				 int allocated_snaps,
-				 gfp_t gfp_flags)
-{
-	int i;
-	u32 snap_count = le32_to_cpu(ondisk->snap_count);
-	int ret = -ENOMEM;
-
-	init_rwsem(&header->snap_rwsem);
-
-	header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
-	header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
-				snap_count *
-				 sizeof(struct rbd_image_snap_ondisk),
-				gfp_flags);
-	if (!header->snapc)
-		return -ENOMEM;
-	if (snap_count) {
-		header->snap_names = kmalloc(header->snap_names_len,
-					     GFP_KERNEL);
-		if (!header->snap_names)
-			goto err_snapc;
-		header->snap_sizes = kmalloc(snap_count * sizeof(u64),
-					     GFP_KERNEL);
-		if (!header->snap_sizes)
-			goto err_names;
-	} else {
-		header->snap_names = NULL;
-		header->snap_sizes = NULL;
-	}
-	memcpy(header->block_name, ondisk->block_name,
-	       sizeof(ondisk->block_name));
-
-	header->image_size = le64_to_cpu(ondisk->image_size);
-	header->obj_order = ondisk->options.order;
-	header->crypt_type = ondisk->options.crypt_type;
-	header->comp_type = ondisk->options.comp_type;
-
-	atomic_set(&header->snapc->nref, 1);
-	header->snap_seq = le64_to_cpu(ondisk->snap_seq);
-	header->snapc->num_snaps = snap_count;
-	header->total_snaps = snap_count;
-
-	if (snap_count &&
-	    allocated_snaps == snap_count) {
-		for (i = 0; i < snap_count; i++) {
-			header->snapc->snaps[i] =
-				le64_to_cpu(ondisk->snaps[i].id);
-			header->snap_sizes[i] =
-				le64_to_cpu(ondisk->snaps[i].image_size);
-		}
-
-		/* copy snapshot names */
-		memcpy(header->snap_names, &ondisk->snaps[i],
-			header->snap_names_len);
-	}
-
-	return 0;
-
-err_names:
-	kfree(header->snap_names);
-err_snapc:
-	kfree(header->snapc);
-	return ret;
-}
-
-static int snap_index(struct rbd_image_header *header, int snap_num)
-{
-	return header->total_snaps - snap_num;
-}
-
-static u64 cur_snap_id(struct rbd_device *rbd_dev)
-{
-	struct rbd_image_header *header = &rbd_dev->header;
-
-	if (!rbd_dev->cur_snap)
-		return 0;
-
-	return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
-}
-
-static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
-			u64 *seq, u64 *size)
-{
-	int i;
-	char *p = header->snap_names;
-
-	for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
-		if (strcmp(snap_name, p) == 0)
-			break;
-	}
-	if (i == header->total_snaps)
-		return -ENOENT;
-	if (seq)
-		*seq = header->snapc->snaps[i];
-
-	if (size)
-		*size = header->snap_sizes[i];
-
-	return i;
-}
-
-static int rbd_header_set_snap(struct rbd_device *dev,
-			       const char *snap_name,
-			       u64 *size)
-{
-	struct rbd_image_header *header = &dev->header;
-	struct ceph_snap_context *snapc = header->snapc;
-	int ret = -ENOENT;
-
-	down_write(&header->snap_rwsem);
-
-	if (!snap_name ||
-	    !*snap_name ||
-	    strcmp(snap_name, "-") == 0 ||
-	    strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) {
-		if (header->total_snaps)
-			snapc->seq = header->snap_seq;
-		else
-			snapc->seq = 0;
-		dev->cur_snap = 0;
-		dev->read_only = 0;
-		if (size)
-			*size = header->image_size;
-	} else {
-		ret = snap_by_name(header, snap_name, &snapc->seq, size);
-		if (ret < 0)
-			goto done;
-
-		dev->cur_snap = header->total_snaps - ret;
-		dev->read_only = 1;
-	}
-
-	ret = 0;
-done:
-	up_write(&header->snap_rwsem);
-	return ret;
-}
-
-static void rbd_header_free(struct rbd_image_header *header)
-{
-	kfree(header->snapc);
-	kfree(header->snap_names);
-	kfree(header->snap_sizes);
-}
-
-/*
- * get the actual striped segment name, offset and length
- */
-static u64 rbd_get_segment(struct rbd_image_header *header,
-			   const char *block_name,
-			   u64 ofs, u64 len,
-			   char *seg_name, u64 *segofs)
-{
-	u64 seg = ofs >> header->obj_order;
-
-	if (seg_name)
-		snprintf(seg_name, RBD_MAX_SEG_NAME_LEN,
-			 "%s.%012llx", block_name, seg);
-
-	ofs = ofs & ((1 << header->obj_order) - 1);
-	len = min_t(u64, len, (1 << header->obj_order) - ofs);
-
-	if (segofs)
-		*segofs = ofs;
-
-	return len;
-}
-
-/*
- * bio helpers
- */
-
-static void bio_chain_put(struct bio *chain)
-{
-	struct bio *tmp;
-
-	while (chain) {
-		tmp = chain;
-		chain = chain->bi_next;
-		bio_put(tmp);
-	}
-}
-
-/*
- * zeros a bio chain, starting at specific offset
- */
-static void zero_bio_chain(struct bio *chain, int start_ofs)
-{
-	struct bio_vec *bv;
-	unsigned long flags;
-	void *buf;
-	int i;
-	int pos = 0;
-
-	while (chain) {
-		bio_for_each_segment(bv, chain, i) {
-			if (pos + bv->bv_len > start_ofs) {
-				int remainder = max(start_ofs - pos, 0);
-				buf = bvec_kmap_irq(bv, &flags);
-				memset(buf + remainder, 0,
-				       bv->bv_len - remainder);
-				bvec_kunmap_irq(buf, &flags);
-			}
-			pos += bv->bv_len;
-		}
-
-		chain = chain->bi_next;
-	}
-}
-
-/*
- * bio_chain_clone - clone a chain of bios up to a certain length.
- * might return a bio_pair that will need to be released.
- */
-static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
-				   struct bio_pair **bp,
-				   int len, gfp_t gfpmask)
-{
-	struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL;
-	int total = 0;
-
-	if (*bp) {
-		bio_pair_release(*bp);
-		*bp = NULL;
-	}
-
-	while (old_chain && (total < len)) {
-		tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
-		if (!tmp)
-			goto err_out;
-
-		if (total + old_chain->bi_size > len) {
-			struct bio_pair *bp;
-
-			/*
-			 * this split can only happen with a single paged bio,
-			 * split_bio will BUG_ON if this is not the case
-			 */
-			dout("bio_chain_clone split! total=%d remaining=%d"
-			     "bi_size=%d\n",
-			     (int)total, (int)len-total,
-			     (int)old_chain->bi_size);
-
-			/* split the bio. We'll release it either in the next
-			   call, or it will have to be released outside */
-			bp = bio_split(old_chain, (len - total) / 512ULL);
-			if (!bp)
-				goto err_out;
-
-			__bio_clone(tmp, &bp->bio1);
-
-			*next = &bp->bio2;
-		} else {
-			__bio_clone(tmp, old_chain);
-			*next = old_chain->bi_next;
-		}
-
-		tmp->bi_bdev = NULL;
-		gfpmask &= ~__GFP_WAIT;
-		tmp->bi_next = NULL;
-
-		if (!new_chain) {
-			new_chain = tail = tmp;
-		} else {
-			tail->bi_next = tmp;
-			tail = tmp;
-		}
-		old_chain = old_chain->bi_next;
-
-		total += tmp->bi_size;
-	}
-
-	BUG_ON(total < len);
-
-	if (tail)
-		tail->bi_next = NULL;
-
-	*old = old_chain;
-
-	return new_chain;
-
-err_out:
-	dout("bio_chain_clone with err\n");
-	bio_chain_put(new_chain);
-	return NULL;
-}
-
-/*
- * helpers for osd request op vectors.
- */
-static int rbd_create_rw_ops(struct ceph_osd_req_op **ops,
-			    int num_ops,
-			    int opcode,
-			    u32 payload_len)
-{
-	*ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1),
-		       GFP_NOIO);
-	if (!*ops)
-		return -ENOMEM;
-	(*ops)[0].op = opcode;
-	/*
-	 * op extent offset and length will be set later on
-	 * in calc_raw_layout()
-	 */
-	(*ops)[0].payload_len = payload_len;
-	return 0;
-}
-
-static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
-{
-	kfree(ops);
-}
-
-/*
- * Send ceph osd request
- */
-static int rbd_do_request(struct request *rq,
-			  struct rbd_device *dev,
-			  struct ceph_snap_context *snapc,
-			  u64 snapid,
-			  const char *obj, u64 ofs, u64 len,
-			  struct bio *bio,
-			  struct page **pages,
-			  int num_pages,
-			  int flags,
-			  struct ceph_osd_req_op *ops,
-			  int num_reply,
-			  void (*rbd_cb)(struct ceph_osd_request *req,
-					 struct ceph_msg *msg))
-{
-	struct ceph_osd_request *req;
-	struct ceph_file_layout *layout;
-	int ret;
-	u64 bno;
-	struct timespec mtime = CURRENT_TIME;
-	struct rbd_request *req_data;
-	struct ceph_osd_request_head *reqhead;
-	struct rbd_image_header *header = &dev->header;
-
-	ret = -ENOMEM;
-	req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
-	if (!req_data)
-		goto done;
-
-	dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs);
-
-	down_read(&header->snap_rwsem);
-
-	req = ceph_osdc_alloc_request(&dev->client->osdc, flags,
-				      snapc,
-				      ops,
-				      false,
-				      GFP_NOIO, pages, bio);
-	if (IS_ERR(req)) {
-		up_read(&header->snap_rwsem);
-		ret = PTR_ERR(req);
-		goto done_pages;
-	}
-
-	req->r_callback = rbd_cb;
-
-	req_data->rq = rq;
-	req_data->bio = bio;
-	req_data->pages = pages;
-	req_data->len = len;
-
-	req->r_priv = req_data;
-
-	reqhead = req->r_request->front.iov_base;
-	reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
-
-	strncpy(req->r_oid, obj, sizeof(req->r_oid));
-	req->r_oid_len = strlen(req->r_oid);
-
-	layout = &req->r_file_layout;
-	memset(layout, 0, sizeof(*layout));
-	layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
-	layout->fl_stripe_count = cpu_to_le32(1);
-	layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
-	layout->fl_pg_preferred = cpu_to_le32(-1);
-	layout->fl_pg_pool = cpu_to_le32(dev->poolid);
-	ceph_calc_raw_layout(&dev->client->osdc, layout, snapid,
-			     ofs, &len, &bno, req, ops);
-
-	ceph_osdc_build_request(req, ofs, &len,
-				ops,
-				snapc,
-				&mtime,
-				req->r_oid, req->r_oid_len);
-	up_read(&header->snap_rwsem);
-
-	ret = ceph_osdc_start_request(&dev->client->osdc, req, false);
-	if (ret < 0)
-		goto done_err;
-
-	if (!rbd_cb) {
-		ret = ceph_osdc_wait_request(&dev->client->osdc, req);
-		ceph_osdc_put_request(req);
-	}
-	return ret;
-
-done_err:
-	bio_chain_put(req_data->bio);
-	ceph_osdc_put_request(req);
-done_pages:
-	kfree(req_data);
-done:
-	if (rq)
-		blk_end_request(rq, ret, len);
-	return ret;
-}
-
-/*
- * Ceph osd op callback
- */
-static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
-{
-	struct rbd_request *req_data = req->r_priv;
-	struct ceph_osd_reply_head *replyhead;
-	struct ceph_osd_op *op;
-	__s32 rc;
-	u64 bytes;
-	int read_op;
-
-	/* parse reply */
-	replyhead = msg->front.iov_base;
-	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
-	op = (void *)(replyhead + 1);
-	rc = le32_to_cpu(replyhead->result);
-	bytes = le64_to_cpu(op->extent.length);
-	read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
-
-	dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
-
-	if (rc == -ENOENT && read_op) {
-		zero_bio_chain(req_data->bio, 0);
-		rc = 0;
-	} else if (rc == 0 && read_op && bytes < req_data->len) {
-		zero_bio_chain(req_data->bio, bytes);
-		bytes = req_data->len;
-	}
-
-	blk_end_request(req_data->rq, rc, bytes);
-
-	if (req_data->bio)
-		bio_chain_put(req_data->bio);
-
-	ceph_osdc_put_request(req);
-	kfree(req_data);
-}
-
-/*
- * Do a synchronous ceph osd operation
- */
-static int rbd_req_sync_op(struct rbd_device *dev,
-			   struct ceph_snap_context *snapc,
-			   u64 snapid,
-			   int opcode,
-			   int flags,
-			   struct ceph_osd_req_op *orig_ops,
-			   int num_reply,
-			   const char *obj,
-			   u64 ofs, u64 len,
-			   char *buf)
-{
-	int ret;
-	struct page **pages;
-	int num_pages;
-	struct ceph_osd_req_op *ops = orig_ops;
-	u32 payload_len;
-
-	num_pages = calc_pages_for(ofs , len);
-	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
-	if (IS_ERR(pages))
-		return PTR_ERR(pages);
-
-	if (!orig_ops) {
-		payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0);
-		ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
-		if (ret < 0)
-			goto done;
-
-		if ((flags & CEPH_OSD_FLAG_WRITE) && buf) {
-			ret = ceph_copy_to_page_vector(pages, buf, ofs, len);
-			if (ret < 0)
-				goto done_ops;
-		}
-	}
-
-	ret = rbd_do_request(NULL, dev, snapc, snapid,
-			  obj, ofs, len, NULL,
-			  pages, num_pages,
-			  flags,
-			  ops,
-			  2,
-			  NULL);
-	if (ret < 0)
-		goto done_ops;
-
-	if ((flags & CEPH_OSD_FLAG_READ) && buf)
-		ret = ceph_copy_from_page_vector(pages, buf, ofs, ret);
-
-done_ops:
-	if (!orig_ops)
-		rbd_destroy_ops(ops);
-done:
-	ceph_release_page_vector(pages, num_pages);
-	return ret;
-}
-
-/*
- * Do an asynchronous ceph osd operation
- */
-static int rbd_do_op(struct request *rq,
-		     struct rbd_device *rbd_dev ,
-		     struct ceph_snap_context *snapc,
-		     u64 snapid,
-		     int opcode, int flags, int num_reply,
-		     u64 ofs, u64 len,
-		     struct bio *bio)
-{
-	char *seg_name;
-	u64 seg_ofs;
-	u64 seg_len;
-	int ret;
-	struct ceph_osd_req_op *ops;
-	u32 payload_len;
-
-	seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
-	if (!seg_name)
-		return -ENOMEM;
-
-	seg_len = rbd_get_segment(&rbd_dev->header,
-				  rbd_dev->header.block_name,
-				  ofs, len,
-				  seg_name, &seg_ofs);
-
-	payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0);
-
-	ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
-	if (ret < 0)
-		goto done;
-
-	/* we've taken care of segment sizes earlier when we
-	   cloned the bios. We should never have a segment
-	   truncated at this point */
-	BUG_ON(seg_len < len);
-
-	ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
-			     seg_name, seg_ofs, seg_len,
-			     bio,
-			     NULL, 0,
-			     flags,
-			     ops,
-			     num_reply,
-			     rbd_req_cb);
-done:
-	kfree(seg_name);
-	return ret;
-}
-
-/*
- * Request async osd write
- */
-static int rbd_req_write(struct request *rq,
-			 struct rbd_device *rbd_dev,
-			 struct ceph_snap_context *snapc,
-			 u64 ofs, u64 len,
-			 struct bio *bio)
-{
-	return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
-			 CEPH_OSD_OP_WRITE,
-			 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-			 2,
-			 ofs, len, bio);
-}
-
-/*
- * Request async osd read
- */
-static int rbd_req_read(struct request *rq,
-			 struct rbd_device *rbd_dev,
-			 u64 snapid,
-			 u64 ofs, u64 len,
-			 struct bio *bio)
-{
-	return rbd_do_op(rq, rbd_dev, NULL,
-			 (snapid ? snapid : CEPH_NOSNAP),
-			 CEPH_OSD_OP_READ,
-			 CEPH_OSD_FLAG_READ,
-			 2,
-			 ofs, len, bio);
-}
-
-/*
- * Request sync osd read
- */
-static int rbd_req_sync_read(struct rbd_device *dev,
-			  struct ceph_snap_context *snapc,
-			  u64 snapid,
-			  const char *obj,
-			  u64 ofs, u64 len,
-			  char *buf)
-{
-	return rbd_req_sync_op(dev, NULL,
-			       (snapid ? snapid : CEPH_NOSNAP),
-			       CEPH_OSD_OP_READ,
-			       CEPH_OSD_FLAG_READ,
-			       NULL,
-			       1, obj, ofs, len, buf);
-}
-
-/*
- * Request sync osd read
- */
-static int rbd_req_sync_rollback_obj(struct rbd_device *dev,
-				     u64 snapid,
-				     const char *obj)
-{
-	struct ceph_osd_req_op *ops;
-	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0);
-	if (ret < 0)
-		return ret;
-
-	ops[0].snap.snapid = snapid;
-
-	ret = rbd_req_sync_op(dev, NULL,
-			       CEPH_NOSNAP,
-			       0,
-			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-			       ops,
-			       1, obj, 0, 0, NULL);
-
-	rbd_destroy_ops(ops);
-
-	if (ret < 0)
-		return ret;
-
-	return ret;
-}
-
-/*
- * Request sync osd read
- */
-static int rbd_req_sync_exec(struct rbd_device *dev,
-			     const char *obj,
-			     const char *cls,
-			     const char *method,
-			     const char *data,
-			     int len)
-{
-	struct ceph_osd_req_op *ops;
-	int cls_len = strlen(cls);
-	int method_len = strlen(method);
-	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL,
-				    cls_len + method_len + len);
-	if (ret < 0)
-		return ret;
-
-	ops[0].cls.class_name = cls;
-	ops[0].cls.class_len = (__u8)cls_len;
-	ops[0].cls.method_name = method;
-	ops[0].cls.method_len = (__u8)method_len;
-	ops[0].cls.argc = 0;
-	ops[0].cls.indata = data;
-	ops[0].cls.indata_len = len;
-
-	ret = rbd_req_sync_op(dev, NULL,
-			       CEPH_NOSNAP,
-			       0,
-			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-			       ops,
-			       1, obj, 0, 0, NULL);
-
-	rbd_destroy_ops(ops);
-
-	dout("cls_exec returned %d\n", ret);
-	return ret;
-}
-
-/*
- * block device queue callback
- */
-static void rbd_rq_fn(struct request_queue *q)
-{
-	struct rbd_device *rbd_dev = q->queuedata;
-	struct request *rq;
-	struct bio_pair *bp = NULL;
-
-	rq = blk_fetch_request(q);
-
-	while (1) {
-		struct bio *bio;
-		struct bio *rq_bio, *next_bio = NULL;
-		bool do_write;
-		int size, op_size = 0;
-		u64 ofs;
-
-		/* peek at request from block layer */
-		if (!rq)
-			break;
-
-		dout("fetched request\n");
-
-		/* filter out block requests we don't understand */
-		if ((rq->cmd_type != REQ_TYPE_FS)) {
-			__blk_end_request_all(rq, 0);
-			goto next;
-		}
-
-		/* deduce our operation (read, write) */
-		do_write = (rq_data_dir(rq) == WRITE);
-
-		size = blk_rq_bytes(rq);
-		ofs = blk_rq_pos(rq) * 512ULL;
-		rq_bio = rq->bio;
-		if (do_write && rbd_dev->read_only) {
-			__blk_end_request_all(rq, -EROFS);
-			goto next;
-		}
-
-		spin_unlock_irq(q->queue_lock);
-
-		dout("%s 0x%x bytes at 0x%llx\n",
-		     do_write ? "write" : "read",
-		     size, blk_rq_pos(rq) * 512ULL);
-
-		do {
-			/* a bio clone to be passed down to OSD req */
-			dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt);
-			op_size = rbd_get_segment(&rbd_dev->header,
-						  rbd_dev->header.block_name,
-						  ofs, size,
-						  NULL, NULL);
-			bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
-					      op_size, GFP_ATOMIC);
-			if (!bio) {
-				spin_lock_irq(q->queue_lock);
-				__blk_end_request_all(rq, -ENOMEM);
-				goto next;
-			}
-
-			/* init OSD command: write or read */
-			if (do_write)
-				rbd_req_write(rq, rbd_dev,
-					      rbd_dev->header.snapc,
-					      ofs,
-					      op_size, bio);
-			else
-				rbd_req_read(rq, rbd_dev,
-					     cur_snap_id(rbd_dev),
-					     ofs,
-					     op_size, bio);
-
-			size -= op_size;
-			ofs += op_size;
-
-			rq_bio = next_bio;
-		} while (size > 0);
-
-		if (bp)
-			bio_pair_release(bp);
-
-		spin_lock_irq(q->queue_lock);
-next:
-		rq = blk_fetch_request(q);
-	}
-}
-
-/*
- * a queue callback. Makes sure that we don't create a bio that spans across
- * multiple osd objects. One exception would be with a single page bios,
- * which we handle later at bio_chain_clone
- */
-static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
-			  struct bio_vec *bvec)
-{
-	struct rbd_device *rbd_dev = q->queuedata;
-	unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9);
-	sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
-	unsigned int bio_sectors = bmd->bi_size >> 9;
-	int max;
-
-	max =  (chunk_sectors - ((sector & (chunk_sectors - 1))
-				 + bio_sectors)) << 9;
-	if (max < 0)
-		max = 0; /* bio_add cannot handle a negative return */
-	if (max <= bvec->bv_len && bio_sectors == 0)
-		return bvec->bv_len;
-	return max;
-}
-
-static void rbd_free_disk(struct rbd_device *rbd_dev)
-{
-	struct gendisk *disk = rbd_dev->disk;
-
-	if (!disk)
-		return;
-
-	rbd_header_free(&rbd_dev->header);
-
-	if (disk->flags & GENHD_FL_UP)
-		del_gendisk(disk);
-	if (disk->queue)
-		blk_cleanup_queue(disk->queue);
-	put_disk(disk);
-}
-
-/*
- * reload the ondisk the header 
- */
-static int rbd_read_header(struct rbd_device *rbd_dev,
-			   struct rbd_image_header *header)
-{
-	ssize_t rc;
-	struct rbd_image_header_ondisk *dh;
-	int snap_count = 0;
-	u64 snap_names_len = 0;
-
-	while (1) {
-		int len = sizeof(*dh) +
-			  snap_count * sizeof(struct rbd_image_snap_ondisk) +
-			  snap_names_len;
-
-		rc = -ENOMEM;
-		dh = kmalloc(len, GFP_KERNEL);
-		if (!dh)
-			return -ENOMEM;
-
-		rc = rbd_req_sync_read(rbd_dev,
-				       NULL, CEPH_NOSNAP,
-				       rbd_dev->obj_md_name,
-				       0, len,
-				       (char *)dh);
-		if (rc < 0)
-			goto out_dh;
-
-		rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
-		if (rc < 0)
-			goto out_dh;
-
-		if (snap_count != header->total_snaps) {
-			snap_count = header->total_snaps;
-			snap_names_len = header->snap_names_len;
-			rbd_header_free(header);
-			kfree(dh);
-			continue;
-		}
-		break;
-	}
-
-out_dh:
-	kfree(dh);
-	return rc;
-}
-
-/*
- * create a snapshot
- */
-static int rbd_header_add_snap(struct rbd_device *dev,
-			       const char *snap_name,
-			       gfp_t gfp_flags)
-{
-	int name_len = strlen(snap_name);
-	u64 new_snapid;
-	int ret;
-	void *data, *data_start, *data_end;
-
-	/* we should create a snapshot only if we're pointing at the head */
-	if (dev->cur_snap)
-		return -EINVAL;
-
-	ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid,
-				      &new_snapid);
-	dout("created snapid=%lld\n", new_snapid);
-	if (ret < 0)
-		return ret;
-
-	data = kmalloc(name_len + 16, gfp_flags);
-	if (!data)
-		return -ENOMEM;
-
-	data_start = data;
-	data_end = data + name_len + 16;
-
-	ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad);
-	ceph_encode_64_safe(&data, data_end, new_snapid, bad);
-
-	ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
-				data_start, data - data_start);
-
-	kfree(data_start);
-
-	if (ret < 0)
-		return ret;
-
-	dev->header.snapc->seq =  new_snapid;
-
-	return 0;
-bad:
-	return -ERANGE;
-}
-
-/*
- * only read the first part of the ondisk header, without the snaps info
- */
-static int rbd_update_snaps(struct rbd_device *rbd_dev)
-{
-	int ret;
-	struct rbd_image_header h;
-	u64 snap_seq;
-
-	ret = rbd_read_header(rbd_dev, &h);
-	if (ret < 0)
-		return ret;
-
-	down_write(&rbd_dev->header.snap_rwsem);
-
-	snap_seq = rbd_dev->header.snapc->seq;
-
-	kfree(rbd_dev->header.snapc);
-	kfree(rbd_dev->header.snap_names);
-	kfree(rbd_dev->header.snap_sizes);
-
-	rbd_dev->header.total_snaps = h.total_snaps;
-	rbd_dev->header.snapc = h.snapc;
-	rbd_dev->header.snap_names = h.snap_names;
-	rbd_dev->header.snap_sizes = h.snap_sizes;
-	rbd_dev->header.snapc->seq = snap_seq;
-
-	up_write(&rbd_dev->header.snap_rwsem);
-
-	return 0;
-}
-
-static int rbd_init_disk(struct rbd_device *rbd_dev)
-{
-	struct gendisk *disk;
-	struct request_queue *q;
-	int rc;
-	u64 total_size = 0;
-
-	/* contact OSD, request size info about the object being mapped */
-	rc = rbd_read_header(rbd_dev, &rbd_dev->header);
-	if (rc)
-		return rc;
-
-	rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size);
-	if (rc)
-		return rc;
-
-	/* create gendisk info */
-	rc = -ENOMEM;
-	disk = alloc_disk(RBD_MINORS_PER_MAJOR);
-	if (!disk)
-		goto out;
-
-	sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id);
-	disk->major = rbd_dev->major;
-	disk->first_minor = 0;
-	disk->fops = &rbd_bd_ops;
-	disk->private_data = rbd_dev;
-
-	/* init rq */
-	rc = -ENOMEM;
-	q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
-	if (!q)
-		goto out_disk;
-	blk_queue_merge_bvec(q, rbd_merge_bvec);
-	disk->queue = q;
-
-	q->queuedata = rbd_dev;
-
-	rbd_dev->disk = disk;
-	rbd_dev->q = q;
-
-	/* finally, announce the disk to the world */
-	set_capacity(disk, total_size / 512ULL);
-	add_disk(disk);
-
-	pr_info("%s: added with size 0x%llx\n",
-		disk->disk_name, (unsigned long long)total_size);
-	return 0;
-
-out_disk:
-	put_disk(disk);
-out:
-	return rc;
-}
-
-/********************************************************************
- * /sys/class/rbd/
- *                   add	map rados objects to blkdev
- *                   remove	unmap rados objects
- *                   list	show mappings
- *******************************************************************/
-
-static void class_rbd_release(struct class *cls)
-{
-	kfree(cls);
-}
-
-static ssize_t class_rbd_list(struct class *c,
-			      struct class_attribute *attr,
-			      char *data)
-{
-	int n = 0;
-	struct list_head *tmp;
-	int max = PAGE_SIZE;
-
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-	n += snprintf(data, max,
-		      "#id\tmajor\tclient_name\tpool\tname\tsnap\tKB\n");
-
-	list_for_each(tmp, &rbd_dev_list) {
-		struct rbd_device *rbd_dev;
-
-		rbd_dev = list_entry(tmp, struct rbd_device, node);
-		n += snprintf(data+n, max-n,
-			      "%d\t%d\tclient%lld\t%s\t%s\t%s\t%lld\n",
-			      rbd_dev->id,
-			      rbd_dev->major,
-			      ceph_client_id(rbd_dev->client),
-			      rbd_dev->pool_name,
-			      rbd_dev->obj, rbd_dev->snap_name,
-			      rbd_dev->header.image_size >> 10);
-		if (n == max)
-			break;
-	}
-
-	mutex_unlock(&ctl_mutex);
-	return n;
-}
-
-static ssize_t class_rbd_add(struct class *c,
-			     struct class_attribute *attr,
-			     const char *buf, size_t count)
-{
-	struct ceph_osd_client *osdc;
-	struct rbd_device *rbd_dev;
-	ssize_t rc = -ENOMEM;
-	int irc, new_id = 0;
-	struct list_head *tmp;
-	char *mon_dev_name;
-	char *options;
-
-	if (!try_module_get(THIS_MODULE))
-		return -ENODEV;
-
-	mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
-	if (!mon_dev_name)
-		goto err_out_mod;
-
-	options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
-	if (!options)
-		goto err_mon_dev;
-
-	/* new rbd_device object */
-	rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
-	if (!rbd_dev)
-		goto err_out_opt;
-
-	/* static rbd_device initialization */
-	spin_lock_init(&rbd_dev->lock);
-	INIT_LIST_HEAD(&rbd_dev->node);
-
-	/* generate unique id: find highest unique id, add one */
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-	list_for_each(tmp, &rbd_dev_list) {
-		struct rbd_device *rbd_dev;
-
-		rbd_dev = list_entry(tmp, struct rbd_device, node);
-		if (rbd_dev->id >= new_id)
-			new_id = rbd_dev->id + 1;
-	}
-
-	rbd_dev->id = new_id;
-
-	/* add to global list */
-	list_add_tail(&rbd_dev->node, &rbd_dev_list);
-
-	/* parse add command */
-	if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s "
-		   "%" __stringify(RBD_MAX_OPT_LEN) "s "
-		   "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s "
-		   "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s"
-		   "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
-		   mon_dev_name, options, rbd_dev->pool_name,
-		   rbd_dev->obj, rbd_dev->snap_name) < 4) {
-		rc = -EINVAL;
-		goto err_out_slot;
-	}
-
-	if (rbd_dev->snap_name[0] == 0)
-		rbd_dev->snap_name[0] = '-';
-
-	rbd_dev->obj_len = strlen(rbd_dev->obj);
-	snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s",
-		 rbd_dev->obj, RBD_SUFFIX);
-
-	/* initialize rest of new object */
-	snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id);
-	rc = rbd_get_client(rbd_dev, mon_dev_name, options);
-	if (rc < 0)
-		goto err_out_slot;
-
-	mutex_unlock(&ctl_mutex);
-
-	/* pick the pool */
-	osdc = &rbd_dev->client->osdc;
-	rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
-	if (rc < 0)
-		goto err_out_client;
-	rbd_dev->poolid = rc;
-
-	/* register our block device */
-	irc = register_blkdev(0, rbd_dev->name);
-	if (irc < 0) {
-		rc = irc;
-		goto err_out_client;
-	}
-	rbd_dev->major = irc;
-
-	/* set up and announce blkdev mapping */
-	rc = rbd_init_disk(rbd_dev);
-	if (rc)
-		goto err_out_blkdev;
-
-	return count;
-
-err_out_blkdev:
-	unregister_blkdev(rbd_dev->major, rbd_dev->name);
-err_out_client:
-	rbd_put_client(rbd_dev);
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-err_out_slot:
-	list_del_init(&rbd_dev->node);
-	mutex_unlock(&ctl_mutex);
-
-	kfree(rbd_dev);
-err_out_opt:
-	kfree(options);
-err_mon_dev:
-	kfree(mon_dev_name);
-err_out_mod:
-	dout("Error adding device %s\n", buf);
-	module_put(THIS_MODULE);
-	return rc;
-}
-
-static struct rbd_device *__rbd_get_dev(unsigned long id)
-{
-	struct list_head *tmp;
-	struct rbd_device *rbd_dev;
-
-	list_for_each(tmp, &rbd_dev_list) {
-		rbd_dev = list_entry(tmp, struct rbd_device, node);
-		if (rbd_dev->id == id)
-			return rbd_dev;
-	}
-	return NULL;
-}
-
-static ssize_t class_rbd_remove(struct class *c,
-				struct class_attribute *attr,
-				const char *buf,
-				size_t count)
-{
-	struct rbd_device *rbd_dev = NULL;
-	int target_id, rc;
-	unsigned long ul;
-
-	rc = strict_strtoul(buf, 10, &ul);
-	if (rc)
-		return rc;
-
-	/* convert to int; abort if we lost anything in the conversion */
-	target_id = (int) ul;
-	if (target_id != ul)
-		return -EINVAL;
-
-	/* remove object from list immediately */
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-	rbd_dev = __rbd_get_dev(target_id);
-	if (rbd_dev)
-		list_del_init(&rbd_dev->node);
-
-	mutex_unlock(&ctl_mutex);
-
-	if (!rbd_dev)
-		return -ENOENT;
-
-	rbd_put_client(rbd_dev);
-
-	/* clean up and free blkdev */
-	rbd_free_disk(rbd_dev);
-	unregister_blkdev(rbd_dev->major, rbd_dev->name);
-	kfree(rbd_dev);
-
-	/* release module ref */
-	module_put(THIS_MODULE);
-
-	return count;
-}
-
-static ssize_t class_rbd_snaps_list(struct class *c,
-			      struct class_attribute *attr,
-			      char *data)
-{
-	struct rbd_device *rbd_dev = NULL;
-	struct list_head *tmp;
-	struct rbd_image_header *header;
-	int i, n = 0, max = PAGE_SIZE;
-	int ret;
-
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-	n += snprintf(data, max, "#id\tsnap\tKB\n");
-
-	list_for_each(tmp, &rbd_dev_list) {
-		char *names, *p;
-		struct ceph_snap_context *snapc;
-
-		rbd_dev = list_entry(tmp, struct rbd_device, node);
-		header = &rbd_dev->header;
-
-		down_read(&header->snap_rwsem);
-
-		names = header->snap_names;
-		snapc = header->snapc;
-
-		n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n",
-			      rbd_dev->id, RBD_SNAP_HEAD_NAME,
-			      header->image_size >> 10,
-			      (!rbd_dev->cur_snap ? " (*)" : ""));
-		if (n == max)
-			break;
-
-		p = names;
-		for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
-			n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n",
-			      rbd_dev->id, p, header->snap_sizes[i] >> 10,
-			      (rbd_dev->cur_snap &&
-			       (snap_index(header, i) == rbd_dev->cur_snap) ?
-			       " (*)" : ""));
-			if (n == max)
-				break;
-		}
-
-		up_read(&header->snap_rwsem);
-	}
-
-
-	ret = n;
-	mutex_unlock(&ctl_mutex);
-	return ret;
-}
-
-static ssize_t class_rbd_snaps_refresh(struct class *c,
-				struct class_attribute *attr,
-				const char *buf,
-				size_t count)
-{
-	struct rbd_device *rbd_dev = NULL;
-	int target_id, rc;
-	unsigned long ul;
-	int ret = count;
-
-	rc = strict_strtoul(buf, 10, &ul);
-	if (rc)
-		return rc;
-
-	/* convert to int; abort if we lost anything in the conversion */
-	target_id = (int) ul;
-	if (target_id != ul)
-		return -EINVAL;
-
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-	rbd_dev = __rbd_get_dev(target_id);
-	if (!rbd_dev) {
-		ret = -ENOENT;
-		goto done;
-	}
-
-	rc = rbd_update_snaps(rbd_dev);
-	if (rc < 0)
-		ret = rc;
-
-done:
-	mutex_unlock(&ctl_mutex);
-	return ret;
-}
-
-static ssize_t class_rbd_snap_create(struct class *c,
-				struct class_attribute *attr,
-				const char *buf,
-				size_t count)
-{
-	struct rbd_device *rbd_dev = NULL;
-	int target_id, ret;
-	char *name;
-
-	name = kmalloc(RBD_MAX_SNAP_NAME_LEN + 1, GFP_KERNEL);
-	if (!name)
-		return -ENOMEM;
-
-	/* parse snaps add command */
-	if (sscanf(buf, "%d "
-		   "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
-		   &target_id,
-		   name) != 2) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-	rbd_dev = __rbd_get_dev(target_id);
-	if (!rbd_dev) {
-		ret = -ENOENT;
-		goto done_unlock;
-	}
-
-	ret = rbd_header_add_snap(rbd_dev,
-				  name, GFP_KERNEL);
-	if (ret < 0)
-		goto done_unlock;
-
-	ret = rbd_update_snaps(rbd_dev);
-	if (ret < 0)
-		goto done_unlock;
-
-	ret = count;
-done_unlock:
-	mutex_unlock(&ctl_mutex);
-done:
-	kfree(name);
-	return ret;
-}
-
-static ssize_t class_rbd_rollback(struct class *c,
-				struct class_attribute *attr,
-				const char *buf,
-				size_t count)
-{
-	struct rbd_device *rbd_dev = NULL;
-	int target_id, ret;
-	u64 snapid;
-	char snap_name[RBD_MAX_SNAP_NAME_LEN];
-	u64 cur_ofs;
-	char *seg_name;
-
-	/* parse snaps add command */
-	if (sscanf(buf, "%d "
-		   "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
-		   &target_id,
-		   snap_name) != 2) {
-		return -EINVAL;
-	}
-
-	ret = -ENOMEM;
-	seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
-	if (!seg_name)
-		return ret;
-
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-	rbd_dev = __rbd_get_dev(target_id);
-	if (!rbd_dev) {
-		ret = -ENOENT;
-		goto done_unlock;
-	}
-
-	ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL);
-	if (ret < 0)
-		goto done_unlock;
-
-	dout("snapid=%lld\n", snapid);
-
-	cur_ofs = 0;
-	while (cur_ofs < rbd_dev->header.image_size) {
-		cur_ofs += rbd_get_segment(&rbd_dev->header,
-					   rbd_dev->obj,
-					   cur_ofs, (u64)-1,
-					   seg_name, NULL);
-		dout("seg_name=%s\n", seg_name);
-
-		ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name);
-		if (ret < 0)
-			pr_warning("could not roll back obj %s err=%d\n",
-				   seg_name, ret);
-	}
-
-	ret = rbd_update_snaps(rbd_dev);
-	if (ret < 0)
-		goto done_unlock;
-
-	ret = count;
-
-done_unlock:
-	mutex_unlock(&ctl_mutex);
-	kfree(seg_name);
-
-	return ret;
-}
-
-static struct class_attribute class_rbd_attrs[] = {
-	__ATTR(add,		0200, NULL, class_rbd_add),
-	__ATTR(remove,		0200, NULL, class_rbd_remove),
-	__ATTR(list,		0444, class_rbd_list, NULL),
-	__ATTR(snaps_refresh,	0200, NULL, class_rbd_snaps_refresh),
-	__ATTR(snap_create,	0200, NULL, class_rbd_snap_create),
-	__ATTR(snaps_list,	0444, class_rbd_snaps_list, NULL),
-	__ATTR(snap_rollback,	0200, NULL, class_rbd_rollback),
-	__ATTR_NULL
-};
-
-/*
- * create control files in sysfs
- * /sys/class/rbd/...
- */
-static int rbd_sysfs_init(void)
-{
-	int ret = -ENOMEM;
-
-	class_rbd = kzalloc(sizeof(*class_rbd), GFP_KERNEL);
-	if (!class_rbd)
-		goto out;
-
-	class_rbd->name = DRV_NAME;
-	class_rbd->owner = THIS_MODULE;
-	class_rbd->class_release = class_rbd_release;
-	class_rbd->class_attrs = class_rbd_attrs;
-
-	ret = class_register(class_rbd);
-	if (ret)
-		goto out_class;
-	return 0;
-
-out_class:
-	kfree(class_rbd);
-	class_rbd = NULL;
-	pr_err(DRV_NAME ": failed to create class rbd\n");
-out:
-	return ret;
-}
-
-static void rbd_sysfs_cleanup(void)
-{
-	if (class_rbd)
-		class_destroy(class_rbd);
-	class_rbd = NULL;
-}
-
-int __init rbd_init(void)
-{
-	int rc;
-
-	rc = rbd_sysfs_init();
-	if (rc)
-		return rc;
-	spin_lock_init(&node_lock);
-	pr_info("loaded " DRV_NAME_LONG "\n");
-	return 0;
-}
-
-void __exit rbd_exit(void)
-{
-	rbd_sysfs_cleanup();
-}
-
-module_init(rbd_init);
-module_exit(rbd_exit);
-
-MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
-MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
-MODULE_DESCRIPTION("rados block device");
-
-/* following authorship retained from original osdblk.c */
-MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
-
-MODULE_LICENSE("GPL");
diff --git a/trunk/drivers/block/rbd_types.h b/trunk/drivers/block/rbd_types.h
deleted file mode 100644
index fc6c678aa2cb..000000000000
--- a/trunk/drivers/block/rbd_types.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2010 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation.  See file COPYING.
- *
- */
-
-#ifndef CEPH_RBD_TYPES_H
-#define CEPH_RBD_TYPES_H
-
-#include <linux/types.h>
-
-/*
- * rbd image 'foo' consists of objects
- *   foo.rbd      - image metadata
- *   foo.00000000
- *   foo.00000001
- *   ...          - data
- */
-
-#define RBD_SUFFIX		".rbd"
-#define RBD_DIRECTORY           "rbd_directory"
-#define RBD_INFO                "rbd_info"
-
-#define RBD_DEFAULT_OBJ_ORDER	22   /* 4MB */
-#define RBD_MIN_OBJ_ORDER       16
-#define RBD_MAX_OBJ_ORDER       30
-
-#define RBD_MAX_OBJ_NAME_LEN	96
-#define RBD_MAX_SEG_NAME_LEN	128
-
-#define RBD_COMP_NONE		0
-#define RBD_CRYPT_NONE		0
-
-#define RBD_HEADER_TEXT		"<<< Rados Block Device Image >>>\n"
-#define RBD_HEADER_SIGNATURE	"RBD"
-#define RBD_HEADER_VERSION	"001.005"
-
-struct rbd_info {
-	__le64 max_id;
-} __attribute__ ((packed));
-
-struct rbd_image_snap_ondisk {
-	__le64 id;
-	__le64 image_size;
-} __attribute__((packed));
-
-struct rbd_image_header_ondisk {
-	char text[40];
-	char block_name[24];
-	char signature[4];
-	char version[8];
-	struct {
-		__u8 order;
-		__u8 crypt_type;
-		__u8 comp_type;
-		__u8 unused;
-	} __attribute__((packed)) options;
-	__le64 image_size;
-	__le64 snap_seq;
-	__le32 snap_count;
-	__le32 reserved;
-	__le64 snap_names_len;
-	struct rbd_image_snap_ondisk snaps[0];
-} __attribute__((packed));
-
-
-#endif
diff --git a/trunk/drivers/block/virtio_blk.c b/trunk/drivers/block/virtio_blk.c
index 8320490226b7..1101e251a629 100644
--- a/trunk/drivers/block/virtio_blk.c
+++ b/trunk/drivers/block/virtio_blk.c
@@ -2,6 +2,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/hdreg.h>
 #include <linux/virtio.h>
 #include <linux/virtio_blk.h>
@@ -221,8 +222,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
 	return err;
 }
 
-static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
-			     unsigned int cmd, unsigned long data)
+static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode,
+			 unsigned cmd, unsigned long data)
 {
 	struct gendisk *disk = bdev->bd_disk;
 	struct virtio_blk *vblk = disk->private_data;
@@ -237,6 +238,18 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 			      (void __user *)data);
 }
 
+static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long param)
+{
+	int ret;
+
+	lock_kernel();
+	ret = virtblk_locked_ioctl(bdev, mode, cmd, param);
+	unlock_kernel();
+
+	return ret;
+}
+
 /* We provide getgeo only to please some old bootloader/partitioning tools */
 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 {
diff --git a/trunk/drivers/char/virtio_console.c b/trunk/drivers/char/virtio_console.c
index 6c1b676643a9..0f69c5ec0ecd 100644
--- a/trunk/drivers/char/virtio_console.c
+++ b/trunk/drivers/char/virtio_console.c
@@ -48,9 +48,6 @@ struct ports_driver_data {
 	/* Used for exporting per-port information to debugfs */
 	struct dentry *debugfs_dir;
 
-	/* List of all the devices we're handling */
-	struct list_head portdevs;
-
 	/* Number of devices this driver is handling */
 	unsigned int index;
 
@@ -111,9 +108,6 @@ struct port_buffer {
  * ports for that device (vdev->priv).
  */
 struct ports_device {
-	/* Next portdev in the list, head is in the pdrvdata struct */
-	struct list_head list;
-
 	/*
 	 * Workqueue handlers where we process deferred work after
 	 * notification
@@ -184,21 +178,15 @@ struct port {
 	struct console cons;
 
 	/* Each port associates with a separate char device */
-	struct cdev *cdev;
+	struct cdev cdev;
 	struct device *dev;
 
-	/* Reference-counting to handle port hot-unplugs and file operations */
-	struct kref kref;
-
 	/* A waitqueue for poll() or blocking read operations */
 	wait_queue_head_t waitqueue;
 
 	/* The 'name' of the port that we expose via sysfs properties */
 	char *name;
 
-	/* We can notify apps of host connect / disconnect events via SIGIO */
-	struct fasync_struct *async_queue;
-
 	/* The 'id' to identify the port with the Host */
 	u32 id;
 
@@ -233,41 +221,6 @@ static struct port *find_port_by_vtermno(u32 vtermno)
 	return port;
 }
 
-static struct port *find_port_by_devt_in_portdev(struct ports_device *portdev,
-						 dev_t dev)
-{
-	struct port *port;
-	unsigned long flags;
-
-	spin_lock_irqsave(&portdev->ports_lock, flags);
-	list_for_each_entry(port, &portdev->ports, list)
-		if (port->cdev->dev == dev)
-			goto out;
-	port = NULL;
-out:
-	spin_unlock_irqrestore(&portdev->ports_lock, flags);
-
-	return port;
-}
-
-static struct port *find_port_by_devt(dev_t dev)
-{
-	struct ports_device *portdev;
-	struct port *port;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pdrvdata_lock, flags);
-	list_for_each_entry(portdev, &pdrvdata.portdevs, list) {
-		port = find_port_by_devt_in_portdev(portdev, dev);
-		if (port)
-			goto out;
-	}
-	port = NULL;
-out:
-	spin_unlock_irqrestore(&pdrvdata_lock, flags);
-	return port;
-}
-
 static struct port *find_port_by_id(struct ports_device *portdev, u32 id)
 {
 	struct port *port;
@@ -457,10 +410,7 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id,
 static ssize_t send_control_msg(struct port *port, unsigned int event,
 				unsigned int value)
 {
-	/* Did the port get unplugged before userspace closed it? */
-	if (port->portdev)
-		return __send_control_msg(port->portdev, port->id, event, value);
-	return 0;
+	return __send_control_msg(port->portdev, port->id, event, value);
 }
 
 /* Callers must take the port->outvq_lock */
@@ -575,10 +525,6 @@ static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count,
 /* The condition that must be true for polling to end */
 static bool will_read_block(struct port *port)
 {
-	if (!port->guest_connected) {
-		/* Port got hot-unplugged. Let's exit. */
-		return false;
-	}
 	return !port_has_data(port) && port->host_connected;
 }
 
@@ -629,9 +575,6 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf,
 		if (ret < 0)
 			return ret;
 	}
-	/* Port got hot-unplugged. */
-	if (!port->guest_connected)
-		return -ENODEV;
 	/*
 	 * We could've received a disconnection message while we were
 	 * waiting for more data.
@@ -673,9 +616,6 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
 		if (ret < 0)
 			return ret;
 	}
-	/* Port got hot-unplugged. */
-	if (!port->guest_connected)
-		return -ENODEV;
 
 	count = min((size_t)(32 * 1024), count);
 
@@ -716,10 +656,6 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
 	port = filp->private_data;
 	poll_wait(filp, &port->waitqueue, wait);
 
-	if (!port->guest_connected) {
-		/* Port got unplugged */
-		return POLLHUP;
-	}
 	ret = 0;
 	if (!will_read_block(port))
 		ret |= POLLIN | POLLRDNORM;
@@ -731,8 +667,6 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
 	return ret;
 }
 
-static void remove_port(struct kref *kref);
-
 static int port_fops_release(struct inode *inode, struct file *filp)
 {
 	struct port *port;
@@ -753,16 +687,6 @@ static int port_fops_release(struct inode *inode, struct file *filp)
 	reclaim_consumed_buffers(port);
 	spin_unlock_irq(&port->outvq_lock);
 
-	/*
-	 * Locks aren't necessary here as a port can't be opened after
-	 * unplug, and if a port isn't unplugged, a kref would already
-	 * exist for the port.  Plus, taking ports_lock here would
-	 * create a dependency on other locks taken by functions
-	 * inside remove_port if we're the last holder of the port,
-	 * creating many problems.
-	 */
-	kref_put(&port->kref, remove_port);
-
 	return 0;
 }
 
@@ -770,31 +694,22 @@ static int port_fops_open(struct inode *inode, struct file *filp)
 {
 	struct cdev *cdev = inode->i_cdev;
 	struct port *port;
-	int ret;
 
-	port = find_port_by_devt(cdev->dev);
+	port = container_of(cdev, struct port, cdev);
 	filp->private_data = port;
 
-	/* Prevent against a port getting hot-unplugged at the same time */
-	spin_lock_irq(&port->portdev->ports_lock);
-	kref_get(&port->kref);
-	spin_unlock_irq(&port->portdev->ports_lock);
-
 	/*
 	 * Don't allow opening of console port devices -- that's done
 	 * via /dev/hvc
 	 */
-	if (is_console_port(port)) {
-		ret = -ENXIO;
-		goto out;
-	}
+	if (is_console_port(port))
+		return -ENXIO;
 
 	/* Allow only one process to open a particular port at a time */
 	spin_lock_irq(&port->inbuf_lock);
 	if (port->guest_connected) {
 		spin_unlock_irq(&port->inbuf_lock);
-		ret = -EMFILE;
-		goto out;
+		return -EMFILE;
 	}
 
 	port->guest_connected = true;
@@ -809,23 +724,10 @@ static int port_fops_open(struct inode *inode, struct file *filp)
 	reclaim_consumed_buffers(port);
 	spin_unlock_irq(&port->outvq_lock);
 
-	nonseekable_open(inode, filp);
-
 	/* Notify host of port being opened */
 	send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1);
 
 	return 0;
-out:
-	kref_put(&port->kref, remove_port);
-	return ret;
-}
-
-static int port_fops_fasync(int fd, struct file *filp, int mode)
-{
-	struct port *port;
-
-	port = filp->private_data;
-	return fasync_helper(fd, filp, mode, &port->async_queue);
 }
 
 /*
@@ -841,8 +743,6 @@ static const struct file_operations port_fops = {
 	.write = port_fops_write,
 	.poll  = port_fops_poll,
 	.release = port_fops_release,
-	.fasync = port_fops_fasync,
-	.llseek = no_llseek,
 };
 
 /*
@@ -1101,12 +1001,6 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock)
 	return nr_added_bufs;
 }
 
-static void send_sigio_to_port(struct port *port)
-{
-	if (port->async_queue && port->guest_connected)
-		kill_fasync(&port->async_queue, SIGIO, POLL_OUT);
-}
-
 static int add_port(struct ports_device *portdev, u32 id)
 {
 	char debugfs_name[16];
@@ -1121,7 +1015,6 @@ static int add_port(struct ports_device *portdev, u32 id)
 		err = -ENOMEM;
 		goto fail;
 	}
-	kref_init(&port->kref);
 
 	port->portdev = portdev;
 	port->id = id;
@@ -1129,7 +1022,6 @@ static int add_port(struct ports_device *portdev, u32 id)
 	port->name = NULL;
 	port->inbuf = NULL;
 	port->cons.hvc = NULL;
-	port->async_queue = NULL;
 
 	port->cons.ws.ws_row = port->cons.ws.ws_col = 0;
 
@@ -1140,20 +1032,14 @@ static int add_port(struct ports_device *portdev, u32 id)
 	port->in_vq = portdev->in_vqs[port->id];
 	port->out_vq = portdev->out_vqs[port->id];
 
-	port->cdev = cdev_alloc();
-	if (!port->cdev) {
-		dev_err(&port->portdev->vdev->dev, "Error allocating cdev\n");
-		err = -ENOMEM;
-		goto free_port;
-	}
-	port->cdev->ops = &port_fops;
+	cdev_init(&port->cdev, &port_fops);
 
 	devt = MKDEV(portdev->chr_major, id);
-	err = cdev_add(port->cdev, devt, 1);
+	err = cdev_add(&port->cdev, devt, 1);
 	if (err < 0) {
 		dev_err(&port->portdev->vdev->dev,
 			"Error %d adding cdev for port %u\n", err, id);
-		goto free_cdev;
+		goto free_port;
 	}
 	port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev,
 				  devt, port, "vport%up%u",
@@ -1218,7 +1104,7 @@ static int add_port(struct ports_device *portdev, u32 id)
 free_device:
 	device_destroy(pdrvdata.class, port->dev->devt);
 free_cdev:
-	cdev_del(port->cdev);
+	cdev_del(&port->cdev);
 free_port:
 	kfree(port);
 fail:
@@ -1227,46 +1113,22 @@ static int add_port(struct ports_device *portdev, u32 id)
 	return err;
 }
 
-/* No users remain, remove all port-specific data. */
-static void remove_port(struct kref *kref)
-{
-	struct port *port;
-
-	port = container_of(kref, struct port, kref);
-
-	sysfs_remove_group(&port->dev->kobj, &port_attribute_group);
-	device_destroy(pdrvdata.class, port->dev->devt);
-	cdev_del(port->cdev);
-
-	kfree(port->name);
-
-	debugfs_remove(port->debugfs_file);
-
-	kfree(port);
-}
-
-/*
- * Port got unplugged.  Remove port from portdev's list and drop the
- * kref reference.  If no userspace has this port opened, it will
- * result in immediate removal the port.
- */
-static void unplug_port(struct port *port)
+/* Remove all port-specific data. */
+static int remove_port(struct port *port)
 {
 	struct port_buffer *buf;
 
-	spin_lock_irq(&port->portdev->ports_lock);
-	list_del(&port->list);
-	spin_unlock_irq(&port->portdev->ports_lock);
-
 	if (port->guest_connected) {
 		port->guest_connected = false;
 		port->host_connected = false;
 		wake_up_interruptible(&port->waitqueue);
-
-		/* Let the app know the port is going down. */
-		send_sigio_to_port(port);
+		send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0);
 	}
 
+	spin_lock_irq(&port->portdev->ports_lock);
+	list_del(&port->list);
+	spin_unlock_irq(&port->portdev->ports_lock);
+
 	if (is_console_port(port)) {
 		spin_lock_irq(&pdrvdata_lock);
 		list_del(&port->cons.list);
@@ -1284,6 +1146,9 @@ static void unplug_port(struct port *port)
 		hvc_remove(port->cons.hvc);
 #endif
 	}
+	sysfs_remove_group(&port->dev->kobj, &port_attribute_group);
+	device_destroy(pdrvdata.class, port->dev->devt);
+	cdev_del(&port->cdev);
 
 	/* Remove unused data this port might have received. */
 	discard_port_data(port);
@@ -1294,19 +1159,12 @@ static void unplug_port(struct port *port)
 	while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
 		free_buf(buf);
 
-	/*
-	 * We should just assume the device itself has gone off --
-	 * else a close on an open port later will try to send out a
-	 * control message.
-	 */
-	port->portdev = NULL;
+	kfree(port->name);
 
-	/*
-	 * Locks around here are not necessary - a port can't be
-	 * opened after we removed the port struct from ports_list
-	 * above.
-	 */
-	kref_put(&port->kref, remove_port);
+	debugfs_remove(port->debugfs_file);
+
+	kfree(port);
+	return 0;
 }
 
 /* Any private messages that the Host and Guest want to share */
@@ -1345,7 +1203,7 @@ static void handle_control_message(struct ports_device *portdev,
 		add_port(portdev, cpkt->id);
 		break;
 	case VIRTIO_CONSOLE_PORT_REMOVE:
-		unplug_port(port);
+		remove_port(port);
 		break;
 	case VIRTIO_CONSOLE_CONSOLE_PORT:
 		if (!cpkt->value)
@@ -1387,12 +1245,6 @@ static void handle_control_message(struct ports_device *portdev,
 		spin_lock_irq(&port->outvq_lock);
 		reclaim_consumed_buffers(port);
 		spin_unlock_irq(&port->outvq_lock);
-
-		/*
-		 * If the guest is connected, it'll be interested in
-		 * knowing the host connection state changed.
-		 */
-		send_sigio_to_port(port);
 		break;
 	case VIRTIO_CONSOLE_PORT_NAME:
 		/*
@@ -1489,9 +1341,6 @@ static void in_intr(struct virtqueue *vq)
 
 	wake_up_interruptible(&port->waitqueue);
 
-	/* Send a SIGIO indicating new data in case the process asked for it */
-	send_sigio_to_port(port);
-
 	if (is_console_port(port) && hvc_poll(port->cons.hvc))
 		hvc_kick();
 }
@@ -1728,10 +1577,6 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
 		add_port(portdev, 0);
 	}
 
-	spin_lock_irq(&pdrvdata_lock);
-	list_add_tail(&portdev->list, &pdrvdata.portdevs);
-	spin_unlock_irq(&pdrvdata_lock);
-
 	__send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID,
 			   VIRTIO_CONSOLE_DEVICE_READY, 1);
 	return 0;
@@ -1755,41 +1600,23 @@ static void virtcons_remove(struct virtio_device *vdev)
 {
 	struct ports_device *portdev;
 	struct port *port, *port2;
+	struct port_buffer *buf;
+	unsigned int len;
 
 	portdev = vdev->priv;
 
-	spin_lock_irq(&pdrvdata_lock);
-	list_del(&portdev->list);
-	spin_unlock_irq(&pdrvdata_lock);
-
-	/* Disable interrupts for vqs */
-	vdev->config->reset(vdev);
-	/* Finish up work that's lined up */
 	cancel_work_sync(&portdev->control_work);
 
 	list_for_each_entry_safe(port, port2, &portdev->ports, list)
-		unplug_port(port);
+		remove_port(port);
 
 	unregister_chrdev(portdev->chr_major, "virtio-portsdev");
 
-	/*
-	 * When yanking out a device, we immediately lose the
-	 * (device-side) queues.  So there's no point in keeping the
-	 * guest side around till we drop our final reference.  This
-	 * also means that any ports which are in an open state will
-	 * have to just stop using the port, as the vqs are going
-	 * away.
-	 */
-	if (use_multiport(portdev)) {
-		struct port_buffer *buf;
-		unsigned int len;
-
-		while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
-			free_buf(buf);
+	while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
+		free_buf(buf);
 
-		while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
-			free_buf(buf);
-	}
+	while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
+		free_buf(buf);
 
 	vdev->config->del_vqs(vdev);
 	kfree(portdev->in_vqs);
@@ -1836,7 +1663,6 @@ static int __init init(void)
 			   PTR_ERR(pdrvdata.debugfs_dir));
 	}
 	INIT_LIST_HEAD(&pdrvdata.consoles);
-	INIT_LIST_HEAD(&pdrvdata.portdevs);
 
 	return register_virtio_driver(&virtio_console);
 }
diff --git a/trunk/fs/ceph/Kconfig b/trunk/fs/ceph/Kconfig
index 9eb134ea6eb2..0fcd2640c23f 100644
--- a/trunk/fs/ceph/Kconfig
+++ b/trunk/fs/ceph/Kconfig
@@ -1,11 +1,9 @@
 config CEPH_FS
         tristate "Ceph distributed file system (EXPERIMENTAL)"
 	depends on INET && EXPERIMENTAL
-	select CEPH_LIB
 	select LIBCRC32C
 	select CRYPTO_AES
 	select CRYPTO
-	default n
 	help
 	  Choose Y or M here to include support for mounting the
 	  experimental Ceph distributed file system.  Ceph is an extremely
@@ -16,3 +14,15 @@ config CEPH_FS
 
 	  If unsure, say N.
 
+config CEPH_FS_PRETTYDEBUG
+	bool "Include file:line in ceph debug output"
+	depends on CEPH_FS
+	default n
+	help
+	  If you say Y here, debug output will include a filename and
+	  line to aid debugging.  This icnreases kernel size and slows
+	  execution slightly when debug call sites are enabled (e.g.,
+	  via CONFIG_DYNAMIC_DEBUG).
+
+	  If unsure, say N.
+
diff --git a/trunk/fs/ceph/Makefile b/trunk/fs/ceph/Makefile
index 9e6c4f2e8ff1..278e1172600d 100644
--- a/trunk/fs/ceph/Makefile
+++ b/trunk/fs/ceph/Makefile
@@ -8,8 +8,15 @@ obj-$(CONFIG_CEPH_FS) += ceph.o
 
 ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
 	export.o caps.o snap.o xattr.o \
-	mds_client.o mdsmap.o strings.o ceph_frag.o \
-	debugfs.o
+	messenger.o msgpool.o buffer.o pagelist.o \
+	mds_client.o mdsmap.o \
+	mon_client.o \
+	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
+	debugfs.o \
+	auth.o auth_none.o \
+	crypto.o armor.o \
+	auth_x.o \
+	ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o
 
 else
 #Otherwise we were called directly from the command
diff --git a/trunk/fs/ceph/README b/trunk/fs/ceph/README
new file mode 100644
index 000000000000..18352fab37c0
--- /dev/null
+++ b/trunk/fs/ceph/README
@@ -0,0 +1,20 @@
+#
+# The following files are shared by (and manually synchronized
+# between) the Ceph userland and kernel client.
+#
+# userland                  kernel
+src/include/ceph_fs.h	    fs/ceph/ceph_fs.h
+src/include/ceph_fs.cc	    fs/ceph/ceph_fs.c
+src/include/msgr.h	    fs/ceph/msgr.h
+src/include/rados.h	    fs/ceph/rados.h
+src/include/ceph_strings.cc fs/ceph/ceph_strings.c
+src/include/ceph_frag.h	    fs/ceph/ceph_frag.h
+src/include/ceph_frag.cc    fs/ceph/ceph_frag.c
+src/include/ceph_hash.h	    fs/ceph/ceph_hash.h
+src/include/ceph_hash.cc    fs/ceph/ceph_hash.c
+src/crush/crush.c	    fs/ceph/crush/crush.c
+src/crush/crush.h	    fs/ceph/crush/crush.h
+src/crush/mapper.c	    fs/ceph/crush/mapper.c
+src/crush/mapper.h	    fs/ceph/crush/mapper.h
+src/crush/hash.h	    fs/ceph/crush/hash.h
+src/crush/hash.c	    fs/ceph/crush/hash.c
diff --git a/trunk/fs/ceph/addr.c b/trunk/fs/ceph/addr.c
index 51bcc5ce3230..efbc604001c8 100644
--- a/trunk/fs/ceph/addr.c
+++ b/trunk/fs/ceph/addr.c
@@ -1,4 +1,4 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/backing-dev.h>
 #include <linux/fs.h>
@@ -10,8 +10,7 @@
 #include <linux/task_io_accounting_ops.h>
 
 #include "super.h"
-#include "mds_client.h"
-#include <linux/ceph/osd_client.h>
+#include "osd_client.h"
 
 /*
  * Ceph address space ops.
@@ -194,8 +193,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_osd_client *osdc = 
-		&ceph_inode_to_client(inode)->client->osdc;
+	struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
 	int err = 0;
 	u64 len = PAGE_CACHE_SIZE;
 
@@ -267,8 +265,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_osd_client *osdc =
-		&ceph_inode_to_client(inode)->client->osdc;
+	struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
 	int rc = 0;
 	struct page **pages;
 	loff_t offset;
@@ -368,7 +365,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 {
 	struct inode *inode;
 	struct ceph_inode_info *ci;
-	struct ceph_fs_client *fsc;
+	struct ceph_client *client;
 	struct ceph_osd_client *osdc;
 	loff_t page_off = page->index << PAGE_CACHE_SHIFT;
 	int len = PAGE_CACHE_SIZE;
@@ -386,8 +383,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	}
 	inode = page->mapping->host;
 	ci = ceph_inode(inode);
-	fsc = ceph_inode_to_client(inode);
-	osdc = &fsc->client->osdc;
+	client = ceph_inode_to_client(inode);
+	osdc = &client->osdc;
 
 	/* verify this is a writeable snap context */
 	snapc = (void *)page->private;
@@ -417,10 +414,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
 	     inode, page, page->index, page_off, len, snapc);
 
-	writeback_stat = atomic_long_inc_return(&fsc->writeback_count);
+	writeback_stat = atomic_long_inc_return(&client->writeback_count);
 	if (writeback_stat >
-	    CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
-		set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
+	    CONGESTION_ON_THRESH(client->mount_args->congestion_kb))
+		set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC);
 
 	set_page_writeback(page);
 	err = ceph_osdc_writepages(osdc, ceph_vino(inode),
@@ -499,7 +496,7 @@ static void writepages_finish(struct ceph_osd_request *req,
 	struct address_space *mapping = inode->i_mapping;
 	__s32 rc = -EIO;
 	u64 bytes = 0;
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_client *client = ceph_inode_to_client(inode);
 	long writeback_stat;
 	unsigned issued = ceph_caps_issued(ci);
 
@@ -532,10 +529,10 @@ static void writepages_finish(struct ceph_osd_request *req,
 		WARN_ON(!PageUptodate(page));
 
 		writeback_stat =
-			atomic_long_dec_return(&fsc->writeback_count);
+			atomic_long_dec_return(&client->writeback_count);
 		if (writeback_stat <
-		    CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
-			clear_bdi_congested(&fsc->backing_dev_info,
+		    CONGESTION_OFF_THRESH(client->mount_args->congestion_kb))
+			clear_bdi_congested(&client->backing_dev_info,
 					    BLK_RW_ASYNC);
 
 		ceph_put_snap_context((void *)page->private);
@@ -572,13 +569,13 @@ static void writepages_finish(struct ceph_osd_request *req,
  * mempool.  we avoid the mempool if we can because req->r_num_pages
  * may be less than the maximum write size.
  */
-static void alloc_page_vec(struct ceph_fs_client *fsc,
+static void alloc_page_vec(struct ceph_client *client,
 			   struct ceph_osd_request *req)
 {
 	req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages,
 			       GFP_NOFS);
 	if (!req->r_pages) {
-		req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS);
+		req->r_pages = mempool_alloc(client->wb_pagevec_pool, GFP_NOFS);
 		req->r_pages_from_pool = 1;
 		WARN_ON(!req->r_pages);
 	}
@@ -593,7 +590,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 	struct inode *inode = mapping->host;
 	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc;
+	struct ceph_client *client;
 	pgoff_t index, start, end;
 	int range_whole = 0;
 	int should_loop = 1;
@@ -620,13 +617,13 @@ static int ceph_writepages_start(struct address_space *mapping,
 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
 	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
 
-	fsc = ceph_inode_to_client(inode);
-	if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
+	client = ceph_inode_to_client(inode);
+	if (client->mount_state == CEPH_MOUNT_SHUTDOWN) {
 		pr_warning("writepage_start %p on forced umount\n", inode);
 		return -EIO; /* we're in a forced umount, don't write! */
 	}
-	if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
-		wsize = fsc->mount_options->wsize;
+	if (client->mount_args->wsize && client->mount_args->wsize < wsize)
+		wsize = client->mount_args->wsize;
 	if (wsize < PAGE_CACHE_SIZE)
 		wsize = PAGE_CACHE_SIZE;
 	max_pages_ever = wsize >> PAGE_CACHE_SHIFT;
@@ -772,7 +769,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 				offset = (unsigned long long)page->index
 					<< PAGE_CACHE_SHIFT;
 				len = wsize;
-				req = ceph_osdc_new_request(&fsc->client->osdc,
+				req = ceph_osdc_new_request(&client->osdc,
 					    &ci->i_layout,
 					    ceph_vino(inode),
 					    offset, &len,
@@ -785,7 +782,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 					    &inode->i_mtime, true, 1);
 				max_pages = req->r_num_pages;
 
-				alloc_page_vec(fsc, req);
+				alloc_page_vec(client, req);
 				req->r_callback = writepages_finish;
 				req->r_inode = inode;
 			}
@@ -797,10 +794,10 @@ static int ceph_writepages_start(struct address_space *mapping,
 			     inode, page, page->index);
 
 			writeback_stat =
-			       atomic_long_inc_return(&fsc->writeback_count);
+			       atomic_long_inc_return(&client->writeback_count);
 			if (writeback_stat > CONGESTION_ON_THRESH(
-				    fsc->mount_options->congestion_kb)) {
-				set_bdi_congested(&fsc->backing_dev_info,
+				    client->mount_args->congestion_kb)) {
+				set_bdi_congested(&client->backing_dev_info,
 						  BLK_RW_ASYNC);
 			}
 
@@ -849,7 +846,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 		op->payload_len = cpu_to_le32(len);
 		req->r_request->hdr.data_len = cpu_to_le32(len);
 
-		ceph_osdc_start_request(&fsc->client->osdc, req, true);
+		ceph_osdc_start_request(&client->osdc, req, true);
 		req = NULL;
 
 		/* continue? */
@@ -918,7 +915,7 @@ static int ceph_update_writeable_page(struct file *file,
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
 	loff_t page_off = pos & PAGE_CACHE_MASK;
 	int pos_in_page = pos & ~PAGE_CACHE_MASK;
 	int end_in_page = pos_in_page + len;
@@ -1056,8 +1053,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
 			  struct page *page, void *fsdata)
 {
 	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_inode_to_client(inode);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
 	int check_cap = 0;
 
@@ -1126,7 +1123,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct inode *inode = vma->vm_file->f_dentry->d_inode;
 	struct page *page = vmf->page;
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
 	loff_t off = page->index << PAGE_CACHE_SHIFT;
 	loff_t size, len;
 	int ret;
diff --git a/trunk/net/ceph/armor.c b/trunk/fs/ceph/armor.c
similarity index 100%
rename from trunk/net/ceph/armor.c
rename to trunk/fs/ceph/armor.c
diff --git a/trunk/net/ceph/auth.c b/trunk/fs/ceph/auth.c
similarity index 97%
rename from trunk/net/ceph/auth.c
rename to trunk/fs/ceph/auth.c
index 549c1f43e1d5..6d2e30600627 100644
--- a/trunk/net/ceph/auth.c
+++ b/trunk/fs/ceph/auth.c
@@ -1,16 +1,16 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 
-#include <linux/ceph/types.h>
-#include <linux/ceph/decode.h>
-#include <linux/ceph/libceph.h>
-#include <linux/ceph/messenger.h>
+#include "types.h"
 #include "auth_none.h"
 #include "auth_x.h"
+#include "decode.h"
+#include "super.h"
 
+#include "messenger.h"
 
 /*
  * get protocol handler
diff --git a/trunk/include/linux/ceph/auth.h b/trunk/fs/ceph/auth.h
similarity index 97%
rename from trunk/include/linux/ceph/auth.h
rename to trunk/fs/ceph/auth.h
index 7fff521d7eb5..d38a2fb4a137 100644
--- a/trunk/include/linux/ceph/auth.h
+++ b/trunk/fs/ceph/auth.h
@@ -1,8 +1,8 @@
 #ifndef _FS_CEPH_AUTH_H
 #define _FS_CEPH_AUTH_H
 
-#include <linux/ceph/types.h>
-#include <linux/ceph/buffer.h>
+#include "types.h"
+#include "buffer.h"
 
 /*
  * Abstract interface for communicating with the authenticate module.
diff --git a/trunk/net/ceph/auth_none.c b/trunk/fs/ceph/auth_none.c
similarity index 96%
rename from trunk/net/ceph/auth_none.c
rename to trunk/fs/ceph/auth_none.c
index 214c2bb43d62..ad1dc21286c7 100644
--- a/trunk/net/ceph/auth_none.c
+++ b/trunk/fs/ceph/auth_none.c
@@ -1,15 +1,14 @@
 
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 
-#include <linux/ceph/decode.h>
-#include <linux/ceph/auth.h>
-
 #include "auth_none.h"
+#include "auth.h"
+#include "decode.h"
 
 static void reset(struct ceph_auth_client *ac)
 {
diff --git a/trunk/net/ceph/auth_none.h b/trunk/fs/ceph/auth_none.h
similarity index 94%
rename from trunk/net/ceph/auth_none.h
rename to trunk/fs/ceph/auth_none.h
index ed7d088b1bc9..8164df1a08be 100644
--- a/trunk/net/ceph/auth_none.h
+++ b/trunk/fs/ceph/auth_none.h
@@ -2,7 +2,8 @@
 #define _FS_CEPH_AUTH_NONE_H
 
 #include <linux/slab.h>
-#include <linux/ceph/auth.h>
+
+#include "auth.h"
 
 /*
  * null security mode.
diff --git a/trunk/net/ceph/auth_x.c b/trunk/fs/ceph/auth_x.c
similarity index 99%
rename from trunk/net/ceph/auth_x.c
rename to trunk/fs/ceph/auth_x.c
index 7fd5dfcf6e18..a2d002cbdec2 100644
--- a/trunk/net/ceph/auth_x.c
+++ b/trunk/fs/ceph/auth_x.c
@@ -1,17 +1,16 @@
 
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 
-#include <linux/ceph/decode.h>
-#include <linux/ceph/auth.h>
-
-#include "crypto.h"
 #include "auth_x.h"
 #include "auth_x_protocol.h"
+#include "crypto.h"
+#include "auth.h"
+#include "decode.h"
 
 #define TEMP_TICKET_BUF_LEN	256
 
diff --git a/trunk/net/ceph/auth_x.h b/trunk/fs/ceph/auth_x.h
similarity index 96%
rename from trunk/net/ceph/auth_x.h
rename to trunk/fs/ceph/auth_x.h
index e02da7a5c5a1..ff6f8180e681 100644
--- a/trunk/net/ceph/auth_x.h
+++ b/trunk/fs/ceph/auth_x.h
@@ -3,9 +3,8 @@
 
 #include <linux/rbtree.h>
 
-#include <linux/ceph/auth.h>
-
 #include "crypto.h"
+#include "auth.h"
 #include "auth_x_protocol.h"
 
 /*
diff --git a/trunk/net/ceph/auth_x_protocol.h b/trunk/fs/ceph/auth_x_protocol.h
similarity index 100%
rename from trunk/net/ceph/auth_x_protocol.h
rename to trunk/fs/ceph/auth_x_protocol.h
diff --git a/trunk/net/ceph/buffer.c b/trunk/fs/ceph/buffer.c
similarity index 86%
rename from trunk/net/ceph/buffer.c
rename to trunk/fs/ceph/buffer.c
index 53d8abfa25d5..cd39f17021de 100644
--- a/trunk/net/ceph/buffer.c
+++ b/trunk/fs/ceph/buffer.c
@@ -1,11 +1,10 @@
 
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
-#include <linux/module.h>
 #include <linux/slab.h>
 
-#include <linux/ceph/buffer.h>
-#include <linux/ceph/decode.h>
+#include "buffer.h"
+#include "decode.h"
 
 struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
 {
@@ -33,7 +32,6 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
 	dout("buffer_new %p\n", b);
 	return b;
 }
-EXPORT_SYMBOL(ceph_buffer_new);
 
 void ceph_buffer_release(struct kref *kref)
 {
@@ -48,7 +46,6 @@ void ceph_buffer_release(struct kref *kref)
 	}
 	kfree(b);
 }
-EXPORT_SYMBOL(ceph_buffer_release);
 
 int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end)
 {
diff --git a/trunk/include/linux/ceph/buffer.h b/trunk/fs/ceph/buffer.h
similarity index 100%
rename from trunk/include/linux/ceph/buffer.h
rename to trunk/fs/ceph/buffer.h
diff --git a/trunk/fs/ceph/caps.c b/trunk/fs/ceph/caps.c
index 98ab13e2b71d..5e9da996a151 100644
--- a/trunk/fs/ceph/caps.c
+++ b/trunk/fs/ceph/caps.c
@@ -1,4 +1,4 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/fs.h>
 #include <linux/kernel.h>
@@ -9,9 +9,8 @@
 #include <linux/writeback.h>
 
 #include "super.h"
-#include "mds_client.h"
-#include <linux/ceph/decode.h>
-#include <linux/ceph/messenger.h>
+#include "decode.h"
+#include "messenger.h"
 
 /*
  * Capability management
@@ -288,11 +287,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
 	spin_unlock(&mdsc->caps_list_lock);
 }
 
-void ceph_reservation_status(struct ceph_fs_client *fsc,
+void ceph_reservation_status(struct ceph_client *client,
 			     int *total, int *avail, int *used, int *reserved,
 			     int *min)
 {
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_client *mdsc = &client->mdsc;
 
 	if (total)
 		*total = mdsc->caps_total_count;
@@ -400,7 +399,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci,
 static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 			       struct ceph_inode_info *ci)
 {
-	struct ceph_mount_options *ma = mdsc->fsc->mount_options;
+	struct ceph_mount_args *ma = mdsc->client->mount_args;
 
 	ci->i_hold_caps_min = round_jiffies(jiffies +
 					    ma->caps_wanted_delay_min * HZ);
@@ -516,7 +515,7 @@ int ceph_add_cap(struct inode *inode,
 		 unsigned seq, unsigned mseq, u64 realmino, int flags,
 		 struct ceph_cap_reservation *caps_reservation)
 {
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_cap *new_cap = NULL;
 	struct ceph_cap *cap;
@@ -874,7 +873,7 @@ void __ceph_remove_cap(struct ceph_cap *cap)
 	struct ceph_mds_session *session = cap->session;
 	struct ceph_inode_info *ci = cap->ci;
 	struct ceph_mds_client *mdsc =
-		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+		&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
 	int removed = 0;
 
 	dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
@@ -1211,7 +1210,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
 	int mds;
 	struct ceph_cap_snap *capsnap;
 	u32 mseq;
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
 	struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
 						    session->s_mutex */
 	u64 next_follows = 0;  /* keep track of how far we've gotten through the
@@ -1337,7 +1336,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
 void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 {
 	struct ceph_mds_client *mdsc =
-		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+		&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
 	struct inode *inode = &ci->vfs_inode;
 	int was = ci->i_dirty_caps;
 	int dirty = 0;
@@ -1379,7 +1378,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 static int __mark_caps_flushing(struct inode *inode,
 				 struct ceph_mds_session *session)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int flushing;
 
@@ -1417,6 +1416,17 @@ static int __mark_caps_flushing(struct inode *inode,
 /*
  * try to invalidate mapping pages without blocking.
  */
+static int mapping_is_empty(struct address_space *mapping)
+{
+	struct page *page = find_get_page(mapping, 0);
+
+	if (!page)
+		return 1;
+
+	put_page(page);
+	return 0;
+}
+
 static int try_nonblocking_invalidate(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1426,7 +1436,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
 	invalidate_mapping_pages(&inode->i_data, 0, -1);
 	spin_lock(&inode->i_lock);
 
-	if (inode->i_data.nrpages == 0 &&
+	if (mapping_is_empty(&inode->i_data) &&
 	    invalidating_gen == ci->i_rdcache_gen) {
 		/* success. */
 		dout("try_nonblocking_invalidate %p success\n", inode);
@@ -1452,8 +1462,8 @@ static int try_nonblocking_invalidate(struct inode *inode)
 void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		     struct ceph_mds_session *session)
 {
-	struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct inode *inode = &ci->vfs_inode;
 	struct ceph_cap *cap;
 	int file_wanted, used;
@@ -1523,7 +1533,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	 */
 	if ((!is_delayed || mdsc->stopping) &&
 	    ci->i_wrbuffer_ref == 0 &&               /* no dirty pages... */
-	    inode->i_data.nrpages &&                 /* have cached pages */
+	    ci->i_rdcache_gen &&                     /* may have cached pages */
 	    (file_wanted == 0 ||                     /* no open files */
 	     (revoking & (CEPH_CAP_FILE_CACHE|
 			  CEPH_CAP_FILE_LAZYIO))) && /*  or revoking cache */
@@ -1696,7 +1706,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
 			  unsigned *flush_tid)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int unlock_session = session ? 0 : 1;
 	int flushing = 0;
@@ -1862,7 +1872,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
 				       caps_are_flushed(inode, flush_tid));
 	} else {
 		struct ceph_mds_client *mdsc =
-			ceph_sb_to_client(inode->i_sb)->mdsc;
+			&ceph_sb_to_client(inode->i_sb)->mdsc;
 
 		spin_lock(&inode->i_lock);
 		if (__ceph_caps_dirty(ci))
@@ -2455,7 +2465,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 	__releases(inode->i_lock)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
 	unsigned seq = le32_to_cpu(m->seq);
 	int dirty = le32_to_cpu(m->dirty);
 	int cleaned = 0;
@@ -2703,7 +2713,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 		      struct ceph_msg *msg)
 {
 	struct ceph_mds_client *mdsc = session->s_mdsc;
-	struct super_block *sb = mdsc->fsc->sb;
+	struct super_block *sb = mdsc->client->sb;
 	struct inode *inode;
 	struct ceph_cap *cap;
 	struct ceph_mds_caps *h;
diff --git a/trunk/include/linux/ceph/ceph_debug.h b/trunk/fs/ceph/ceph_debug.h
similarity index 86%
rename from trunk/include/linux/ceph/ceph_debug.h
rename to trunk/fs/ceph/ceph_debug.h
index aa2e19182d99..1818c2305610 100644
--- a/trunk/include/linux/ceph/ceph_debug.h
+++ b/trunk/fs/ceph/ceph_debug.h
@@ -3,7 +3,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
+#ifdef CONFIG_CEPH_FS_PRETTYDEBUG
 
 /*
  * wrap pr_debug to include a filename:lineno prefix on each line.
@@ -14,8 +14,7 @@
 # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
 extern const char *ceph_file_part(const char *s, int len);
 #  define dout(fmt, ...)						\
-	pr_debug("%.*s %12.12s:%-4d : " fmt,				\
-		 8 - (int)sizeof(KBUILD_MODNAME), "    ",		\
+	pr_debug(" %12.12s:%-4d : " fmt,				\
 		 ceph_file_part(__FILE__, sizeof(__FILE__)),		\
 		 __LINE__, ##__VA_ARGS__)
 # else
diff --git a/trunk/fs/ceph/ceph_frag.c b/trunk/fs/ceph/ceph_frag.c
index bdce8b1fbd06..ab6cf35c4091 100644
--- a/trunk/fs/ceph/ceph_frag.c
+++ b/trunk/fs/ceph/ceph_frag.c
@@ -1,8 +1,7 @@
 /*
  * Ceph 'frag' type
  */
-#include <linux/module.h>
-#include <linux/ceph/types.h>
+#include "types.h"
 
 int ceph_frag_compare(__u32 a, __u32 b)
 {
diff --git a/trunk/include/linux/ceph/ceph_frag.h b/trunk/fs/ceph/ceph_frag.h
similarity index 100%
rename from trunk/include/linux/ceph/ceph_frag.h
rename to trunk/fs/ceph/ceph_frag.h
diff --git a/trunk/net/ceph/ceph_fs.c b/trunk/fs/ceph/ceph_fs.c
similarity index 92%
rename from trunk/net/ceph/ceph_fs.c
rename to trunk/fs/ceph/ceph_fs.c
index a3a3a31d3c37..3ac6cc7c1156 100644
--- a/trunk/net/ceph/ceph_fs.c
+++ b/trunk/fs/ceph/ceph_fs.c
@@ -1,8 +1,7 @@
 /*
  * Some non-inline ceph helpers
  */
-#include <linux/module.h>
-#include <linux/ceph/types.h>
+#include "types.h"
 
 /*
  * return true if @layout appears to be valid
@@ -53,7 +52,6 @@ int ceph_flags_to_mode(int flags)
 
 	return mode;
 }
-EXPORT_SYMBOL(ceph_flags_to_mode);
 
 int ceph_caps_for_mode(int mode)
 {
@@ -72,4 +70,3 @@ int ceph_caps_for_mode(int mode)
 
 	return caps;
 }
-EXPORT_SYMBOL(ceph_caps_for_mode);
diff --git a/trunk/include/linux/ceph/ceph_fs.h b/trunk/fs/ceph/ceph_fs.h
similarity index 99%
rename from trunk/include/linux/ceph/ceph_fs.h
rename to trunk/fs/ceph/ceph_fs.h
index c3c74aef289d..d5619ac86711 100644
--- a/trunk/include/linux/ceph/ceph_fs.h
+++ b/trunk/fs/ceph/ceph_fs.h
@@ -299,7 +299,6 @@ enum {
 	CEPH_MDS_OP_SETATTR    = 0x01108,
 	CEPH_MDS_OP_SETFILELOCK= 0x01109,
 	CEPH_MDS_OP_GETFILELOCK= 0x00110,
-	CEPH_MDS_OP_SETDIRLAYOUT=0x0110a,
 
 	CEPH_MDS_OP_MKNOD      = 0x01201,
 	CEPH_MDS_OP_LINK       = 0x01202,
diff --git a/trunk/net/ceph/ceph_hash.c b/trunk/fs/ceph/ceph_hash.c
similarity index 98%
rename from trunk/net/ceph/ceph_hash.c
rename to trunk/fs/ceph/ceph_hash.c
index 815ef8826796..bd570015d147 100644
--- a/trunk/net/ceph/ceph_hash.c
+++ b/trunk/fs/ceph/ceph_hash.c
@@ -1,5 +1,5 @@
 
-#include <linux/ceph/types.h>
+#include "types.h"
 
 /*
  * Robert Jenkin's hash function.
diff --git a/trunk/include/linux/ceph/ceph_hash.h b/trunk/fs/ceph/ceph_hash.h
similarity index 100%
rename from trunk/include/linux/ceph/ceph_hash.h
rename to trunk/fs/ceph/ceph_hash.h
diff --git a/trunk/fs/ceph/strings.c b/trunk/fs/ceph/ceph_strings.c
similarity index 59%
rename from trunk/fs/ceph/strings.c
rename to trunk/fs/ceph/ceph_strings.c
index cd5097d7c804..c6179d3a26a2 100644
--- a/trunk/fs/ceph/strings.c
+++ b/trunk/fs/ceph/ceph_strings.c
@@ -1,9 +1,71 @@
 /*
- * Ceph fs string constants
+ * Ceph string constants
  */
-#include <linux/module.h>
-#include <linux/ceph/types.h>
+#include "types.h"
 
+const char *ceph_entity_type_name(int type)
+{
+	switch (type) {
+	case CEPH_ENTITY_TYPE_MDS: return "mds";
+	case CEPH_ENTITY_TYPE_OSD: return "osd";
+	case CEPH_ENTITY_TYPE_MON: return "mon";
+	case CEPH_ENTITY_TYPE_CLIENT: return "client";
+	case CEPH_ENTITY_TYPE_AUTH: return "auth";
+	default: return "unknown";
+	}
+}
+
+const char *ceph_osd_op_name(int op)
+{
+	switch (op) {
+	case CEPH_OSD_OP_READ: return "read";
+	case CEPH_OSD_OP_STAT: return "stat";
+
+	case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
+
+	case CEPH_OSD_OP_WRITE: return "write";
+	case CEPH_OSD_OP_DELETE: return "delete";
+	case CEPH_OSD_OP_TRUNCATE: return "truncate";
+	case CEPH_OSD_OP_ZERO: return "zero";
+	case CEPH_OSD_OP_WRITEFULL: return "writefull";
+	case CEPH_OSD_OP_ROLLBACK: return "rollback";
+
+	case CEPH_OSD_OP_APPEND: return "append";
+	case CEPH_OSD_OP_STARTSYNC: return "startsync";
+	case CEPH_OSD_OP_SETTRUNC: return "settrunc";
+	case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
+
+	case CEPH_OSD_OP_TMAPUP: return "tmapup";
+	case CEPH_OSD_OP_TMAPGET: return "tmapget";
+	case CEPH_OSD_OP_TMAPPUT: return "tmapput";
+
+	case CEPH_OSD_OP_GETXATTR: return "getxattr";
+	case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
+	case CEPH_OSD_OP_SETXATTR: return "setxattr";
+	case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
+	case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
+	case CEPH_OSD_OP_RMXATTR: return "rmxattr";
+	case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
+
+	case CEPH_OSD_OP_PULL: return "pull";
+	case CEPH_OSD_OP_PUSH: return "push";
+	case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
+	case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
+	case CEPH_OSD_OP_SCRUB: return "scrub";
+
+	case CEPH_OSD_OP_WRLOCK: return "wrlock";
+	case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
+	case CEPH_OSD_OP_RDLOCK: return "rdlock";
+	case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
+	case CEPH_OSD_OP_UPLOCK: return "uplock";
+	case CEPH_OSD_OP_DNLOCK: return "dnlock";
+
+	case CEPH_OSD_OP_CALL: return "call";
+
+	case CEPH_OSD_OP_PGLS: return "pgls";
+	}
+	return "???";
+}
 
 const char *ceph_mds_state_name(int s)
 {
@@ -115,3 +177,17 @@ const char *ceph_snap_op_name(int o)
 	}
 	return "???";
 }
+
+const char *ceph_pool_op_name(int op)
+{
+	switch (op) {
+	case POOL_OP_CREATE: return "create";
+	case POOL_OP_DELETE: return "delete";
+	case POOL_OP_AUID_CHANGE: return "auid change";
+	case POOL_OP_CREATE_SNAP: return "create snap";
+	case POOL_OP_DELETE_SNAP: return "delete snap";
+	case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
+	case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
+	}
+	return "???";
+}
diff --git a/trunk/net/ceph/crush/crush.c b/trunk/fs/ceph/crush/crush.c
similarity index 99%
rename from trunk/net/ceph/crush/crush.c
rename to trunk/fs/ceph/crush/crush.c
index d6ebb13a18a4..fabd302e5779 100644
--- a/trunk/net/ceph/crush/crush.c
+++ b/trunk/fs/ceph/crush/crush.c
@@ -8,7 +8,7 @@
 # define BUG_ON(x) assert(!(x))
 #endif
 
-#include <linux/crush/crush.h>
+#include "crush.h"
 
 const char *crush_bucket_alg_name(int alg)
 {
diff --git a/trunk/include/linux/crush/crush.h b/trunk/fs/ceph/crush/crush.h
similarity index 100%
rename from trunk/include/linux/crush/crush.h
rename to trunk/fs/ceph/crush/crush.h
diff --git a/trunk/net/ceph/crush/hash.c b/trunk/fs/ceph/crush/hash.c
similarity index 99%
rename from trunk/net/ceph/crush/hash.c
rename to trunk/fs/ceph/crush/hash.c
index 5bb63e37a8a1..5873aed694bf 100644
--- a/trunk/net/ceph/crush/hash.c
+++ b/trunk/fs/ceph/crush/hash.c
@@ -1,6 +1,6 @@
 
 #include <linux/types.h>
-#include <linux/crush/hash.h>
+#include "hash.h"
 
 /*
  * Robert Jenkins' function for mixing 32-bit values
diff --git a/trunk/include/linux/crush/hash.h b/trunk/fs/ceph/crush/hash.h
similarity index 100%
rename from trunk/include/linux/crush/hash.h
rename to trunk/fs/ceph/crush/hash.h
diff --git a/trunk/net/ceph/crush/mapper.c b/trunk/fs/ceph/crush/mapper.c
similarity index 99%
rename from trunk/net/ceph/crush/mapper.c
rename to trunk/fs/ceph/crush/mapper.c
index 42599e31dcad..a4eec133258e 100644
--- a/trunk/net/ceph/crush/mapper.c
+++ b/trunk/fs/ceph/crush/mapper.c
@@ -18,8 +18,8 @@
 # define kfree(x) free(x)
 #endif
 
-#include <linux/crush/crush.h>
-#include <linux/crush/hash.h>
+#include "crush.h"
+#include "hash.h"
 
 /*
  * Implement the core CRUSH mapping algorithm.
diff --git a/trunk/include/linux/crush/mapper.h b/trunk/fs/ceph/crush/mapper.h
similarity index 100%
rename from trunk/include/linux/crush/mapper.h
rename to trunk/fs/ceph/crush/mapper.h
diff --git a/trunk/net/ceph/crypto.c b/trunk/fs/ceph/crypto.c
similarity index 99%
rename from trunk/net/ceph/crypto.c
rename to trunk/fs/ceph/crypto.c
index 7b505b0c983f..a3e627f63293 100644
--- a/trunk/net/ceph/crypto.c
+++ b/trunk/fs/ceph/crypto.c
@@ -1,13 +1,13 @@
 
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/err.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <crypto/hash.h>
 
-#include <linux/ceph/decode.h>
 #include "crypto.h"
+#include "decode.h"
 
 int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
 {
diff --git a/trunk/net/ceph/crypto.h b/trunk/fs/ceph/crypto.h
similarity index 95%
rename from trunk/net/ceph/crypto.h
rename to trunk/fs/ceph/crypto.h
index f9eccace592b..bdf38607323c 100644
--- a/trunk/net/ceph/crypto.h
+++ b/trunk/fs/ceph/crypto.h
@@ -1,8 +1,8 @@
 #ifndef _FS_CEPH_CRYPTO_H
 #define _FS_CEPH_CRYPTO_H
 
-#include <linux/ceph/types.h>
-#include <linux/ceph/buffer.h>
+#include "types.h"
+#include "buffer.h"
 
 /*
  * cryptographic secret
diff --git a/trunk/fs/ceph/debugfs.c b/trunk/fs/ceph/debugfs.c
index 7ae1b3d55b58..6fd8b20a8611 100644
--- a/trunk/fs/ceph/debugfs.c
+++ b/trunk/fs/ceph/debugfs.c
@@ -1,4 +1,4 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/device.h>
 #include <linux/slab.h>
@@ -7,49 +7,143 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
-#include <linux/ceph/libceph.h>
-#include <linux/ceph/mon_client.h>
-#include <linux/ceph/auth.h>
-#include <linux/ceph/debugfs.h>
-
 #include "super.h"
+#include "mds_client.h"
+#include "mon_client.h"
+#include "auth.h"
 
 #ifdef CONFIG_DEBUG_FS
 
-#include "mds_client.h"
+/*
+ * Implement /sys/kernel/debug/ceph fun
+ *
+ * /sys/kernel/debug/ceph/client*  - an instance of the ceph client
+ *      .../osdmap      - current osdmap
+ *      .../mdsmap      - current mdsmap
+ *      .../monmap      - current monmap
+ *      .../osdc        - active osd requests
+ *      .../mdsc        - active mds requests
+ *      .../monc        - mon client state
+ *      .../dentry_lru  - dump contents of dentry lru
+ *      .../caps        - expose cap (reservation) stats
+ *      .../bdi         - symlink to ../../bdi/something
+ */
+
+static struct dentry *ceph_debugfs_dir;
+
+static int monmap_show(struct seq_file *s, void *p)
+{
+	int i;
+	struct ceph_client *client = s->private;
+
+	if (client->monc.monmap == NULL)
+		return 0;
+
+	seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
+	for (i = 0; i < client->monc.monmap->num_mon; i++) {
+		struct ceph_entity_inst *inst =
+			&client->monc.monmap->mon_inst[i];
+
+		seq_printf(s, "\t%s%lld\t%s\n",
+			   ENTITY_NAME(inst->name),
+			   pr_addr(&inst->addr.in_addr));
+	}
+	return 0;
+}
 
 static int mdsmap_show(struct seq_file *s, void *p)
 {
 	int i;
-	struct ceph_fs_client *fsc = s->private;
+	struct ceph_client *client = s->private;
 
-	if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
+	if (client->mdsc.mdsmap == NULL)
 		return 0;
-	seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
-	seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
+	seq_printf(s, "epoch %d\n", client->mdsc.mdsmap->m_epoch);
+	seq_printf(s, "root %d\n", client->mdsc.mdsmap->m_root);
 	seq_printf(s, "session_timeout %d\n",
-		       fsc->mdsc->mdsmap->m_session_timeout);
+		       client->mdsc.mdsmap->m_session_timeout);
 	seq_printf(s, "session_autoclose %d\n",
-		       fsc->mdsc->mdsmap->m_session_autoclose);
-	for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
+		       client->mdsc.mdsmap->m_session_autoclose);
+	for (i = 0; i < client->mdsc.mdsmap->m_max_mds; i++) {
 		struct ceph_entity_addr *addr =
-			&fsc->mdsc->mdsmap->m_info[i].addr;
-		int state = fsc->mdsc->mdsmap->m_info[i].state;
+			&client->mdsc.mdsmap->m_info[i].addr;
+		int state = client->mdsc.mdsmap->m_info[i].state;
 
-		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
-			       ceph_pr_addr(&addr->in_addr),
+		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, pr_addr(&addr->in_addr),
 			       ceph_mds_state_name(state));
 	}
 	return 0;
 }
 
-/*
- * mdsc debugfs
- */
+static int osdmap_show(struct seq_file *s, void *p)
+{
+	int i;
+	struct ceph_client *client = s->private;
+	struct rb_node *n;
+
+	if (client->osdc.osdmap == NULL)
+		return 0;
+	seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch);
+	seq_printf(s, "flags%s%s\n",
+		   (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ?
+		   " NEARFULL" : "",
+		   (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
+		   " FULL" : "");
+	for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
+		struct ceph_pg_pool_info *pool =
+			rb_entry(n, struct ceph_pg_pool_info, node);
+		seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
+			   pool->id, pool->v.pg_num, pool->pg_num_mask,
+			   pool->v.lpg_num, pool->lpg_num_mask);
+	}
+	for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
+		struct ceph_entity_addr *addr =
+			&client->osdc.osdmap->osd_addr[i];
+		int state = client->osdc.osdmap->osd_state[i];
+		char sb[64];
+
+		seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n",
+			   i, pr_addr(&addr->in_addr),
+			   ((client->osdc.osdmap->osd_weight[i]*100) >> 16),
+			   ceph_osdmap_state_str(sb, sizeof(sb), state));
+	}
+	return 0;
+}
+
+static int monc_show(struct seq_file *s, void *p)
+{
+	struct ceph_client *client = s->private;
+	struct ceph_mon_generic_request *req;
+	struct ceph_mon_client *monc = &client->monc;
+	struct rb_node *rp;
+
+	mutex_lock(&monc->mutex);
+
+	if (monc->have_mdsmap)
+		seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap);
+	if (monc->have_osdmap)
+		seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap);
+	if (monc->want_next_osdmap)
+		seq_printf(s, "want next osdmap\n");
+
+	for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
+		__u16 op;
+		req = rb_entry(rp, struct ceph_mon_generic_request, node);
+		op = le16_to_cpu(req->request->hdr.type);
+		if (op == CEPH_MSG_STATFS)
+			seq_printf(s, "%lld statfs\n", req->tid);
+		else
+			seq_printf(s, "%lld unknown\n", req->tid);
+	}
+
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+
 static int mdsc_show(struct seq_file *s, void *p)
 {
-	struct ceph_fs_client *fsc = s->private;
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = s->private;
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct ceph_mds_request *req;
 	struct rb_node *rp;
 	int pathlen;
@@ -120,12 +214,61 @@ static int mdsc_show(struct seq_file *s, void *p)
 	return 0;
 }
 
+static int osdc_show(struct seq_file *s, void *pp)
+{
+	struct ceph_client *client = s->private;
+	struct ceph_osd_client *osdc = &client->osdc;
+	struct rb_node *p;
+
+	mutex_lock(&osdc->request_mutex);
+	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+		struct ceph_osd_request *req;
+		struct ceph_osd_request_head *head;
+		struct ceph_osd_op *op;
+		int num_ops;
+		int opcode, olen;
+		int i;
+
+		req = rb_entry(p, struct ceph_osd_request, r_node);
+
+		seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
+			   req->r_osd ? req->r_osd->o_osd : -1,
+			   le32_to_cpu(req->r_pgid.pool),
+			   le16_to_cpu(req->r_pgid.ps));
+
+		head = req->r_request->front.iov_base;
+		op = (void *)(head + 1);
+
+		num_ops = le16_to_cpu(head->num_ops);
+		olen = le32_to_cpu(head->object_len);
+		seq_printf(s, "%.*s", olen,
+			   (const char *)(head->ops + num_ops));
+
+		if (req->r_reassert_version.epoch)
+			seq_printf(s, "\t%u'%llu",
+			   (unsigned)le32_to_cpu(req->r_reassert_version.epoch),
+			   le64_to_cpu(req->r_reassert_version.version));
+		else
+			seq_printf(s, "\t");
+
+		for (i = 0; i < num_ops; i++) {
+			opcode = le16_to_cpu(op->op);
+			seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
+			op++;
+		}
+
+		seq_printf(s, "\n");
+	}
+	mutex_unlock(&osdc->request_mutex);
+	return 0;
+}
+
 static int caps_show(struct seq_file *s, void *p)
 {
-	struct ceph_fs_client *fsc = s->private;
+	struct ceph_client *client = s->private;
 	int total, avail, used, reserved, min;
 
-	ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
+	ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
 	seq_printf(s, "total\t\t%d\n"
 		   "avail\t\t%d\n"
 		   "used\t\t%d\n"
@@ -137,8 +280,8 @@ static int caps_show(struct seq_file *s, void *p)
 
 static int dentry_lru_show(struct seq_file *s, void *ptr)
 {
-	struct ceph_fs_client *fsc = s->private;
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = s->private;
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct ceph_dentry_info *di;
 
 	spin_lock(&mdsc->dentry_lru_lock);
@@ -152,124 +295,199 @@ static int dentry_lru_show(struct seq_file *s, void *ptr)
 	return 0;
 }
 
-CEPH_DEFINE_SHOW_FUNC(mdsmap_show)
-CEPH_DEFINE_SHOW_FUNC(mdsc_show)
-CEPH_DEFINE_SHOW_FUNC(caps_show)
-CEPH_DEFINE_SHOW_FUNC(dentry_lru_show)
-
+#define DEFINE_SHOW_FUNC(name)						\
+static int name##_open(struct inode *inode, struct file *file)		\
+{									\
+	struct seq_file *sf;						\
+	int ret;							\
+									\
+	ret = single_open(file, name, NULL);				\
+	sf = file->private_data;					\
+	sf->private = inode->i_private;					\
+	return ret;							\
+}									\
+									\
+static const struct file_operations name##_fops = {			\
+	.open		= name##_open,					\
+	.read		= seq_read,					\
+	.llseek		= seq_lseek,					\
+	.release	= single_release,				\
+};
+
+DEFINE_SHOW_FUNC(monmap_show)
+DEFINE_SHOW_FUNC(mdsmap_show)
+DEFINE_SHOW_FUNC(osdmap_show)
+DEFINE_SHOW_FUNC(monc_show)
+DEFINE_SHOW_FUNC(mdsc_show)
+DEFINE_SHOW_FUNC(osdc_show)
+DEFINE_SHOW_FUNC(dentry_lru_show)
+DEFINE_SHOW_FUNC(caps_show)
 
-/*
- * debugfs
- */
 static int congestion_kb_set(void *data, u64 val)
 {
-	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+	struct ceph_client *client = (struct ceph_client *)data;
+
+	if (client)
+		client->mount_args->congestion_kb = (int)val;
 
-	fsc->mount_options->congestion_kb = (int)val;
 	return 0;
 }
 
 static int congestion_kb_get(void *data, u64 *val)
 {
-	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+	struct ceph_client *client = (struct ceph_client *)data;
+
+	if (client)
+		*val = (u64)client->mount_args->congestion_kb;
 
-	*val = (u64)fsc->mount_options->congestion_kb;
 	return 0;
 }
 
+
 DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
 			congestion_kb_set, "%llu\n");
 
+int __init ceph_debugfs_init(void)
+{
+	ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
+	if (!ceph_debugfs_dir)
+		return -ENOMEM;
+	return 0;
+}
 
-void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
+void ceph_debugfs_cleanup(void)
 {
-	dout("ceph_fs_debugfs_cleanup\n");
-	debugfs_remove(fsc->debugfs_bdi);
-	debugfs_remove(fsc->debugfs_congestion_kb);
-	debugfs_remove(fsc->debugfs_mdsmap);
-	debugfs_remove(fsc->debugfs_caps);
-	debugfs_remove(fsc->debugfs_mdsc);
-	debugfs_remove(fsc->debugfs_dentry_lru);
+	debugfs_remove(ceph_debugfs_dir);
 }
 
-int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
+int ceph_debugfs_client_init(struct ceph_client *client)
 {
-	char name[100];
-	int err = -ENOMEM;
+	int ret = 0;
+	char name[80];
 
-	dout("ceph_fs_debugfs_init\n");
-	fsc->debugfs_congestion_kb =
-		debugfs_create_file("writeback_congestion_kb",
-				    0600,
-				    fsc->client->debugfs_dir,
-				    fsc,
-				    &congestion_kb_fops);
-	if (!fsc->debugfs_congestion_kb)
+	snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
+		 client->monc.auth->global_id);
+
+	client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
+	if (!client->debugfs_dir)
 		goto out;
 
-	dout("a\n");
+	client->monc.debugfs_file = debugfs_create_file("monc",
+						      0600,
+						      client->debugfs_dir,
+						      client,
+						      &monc_show_fops);
+	if (!client->monc.debugfs_file)
+		goto out;
+
+	client->mdsc.debugfs_file = debugfs_create_file("mdsc",
+						      0600,
+						      client->debugfs_dir,
+						      client,
+						      &mdsc_show_fops);
+	if (!client->mdsc.debugfs_file)
+		goto out;
 
-	snprintf(name, sizeof(name), "../../bdi/%s",
-		 dev_name(fsc->backing_dev_info.dev));
-	fsc->debugfs_bdi =
-		debugfs_create_symlink("bdi",
-				       fsc->client->debugfs_dir,
-				       name);
-	if (!fsc->debugfs_bdi)
+	client->osdc.debugfs_file = debugfs_create_file("osdc",
+						      0600,
+						      client->debugfs_dir,
+						      client,
+						      &osdc_show_fops);
+	if (!client->osdc.debugfs_file)
 		goto out;
 
-	dout("b\n");
-	fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
+	client->debugfs_monmap = debugfs_create_file("monmap",
 					0600,
-					fsc->client->debugfs_dir,
-					fsc,
+					client->debugfs_dir,
+					client,
+					&monmap_show_fops);
+	if (!client->debugfs_monmap)
+		goto out;
+
+	client->debugfs_mdsmap = debugfs_create_file("mdsmap",
+					0600,
+					client->debugfs_dir,
+					client,
 					&mdsmap_show_fops);
-	if (!fsc->debugfs_mdsmap)
+	if (!client->debugfs_mdsmap)
+		goto out;
+
+	client->debugfs_osdmap = debugfs_create_file("osdmap",
+					0600,
+					client->debugfs_dir,
+					client,
+					&osdmap_show_fops);
+	if (!client->debugfs_osdmap)
 		goto out;
 
-	dout("ca\n");
-	fsc->debugfs_mdsc = debugfs_create_file("mdsc",
-						0600,
-						fsc->client->debugfs_dir,
-						fsc,
-						&mdsc_show_fops);
-	if (!fsc->debugfs_mdsc)
+	client->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
+					0600,
+					client->debugfs_dir,
+					client,
+					&dentry_lru_show_fops);
+	if (!client->debugfs_dentry_lru)
 		goto out;
 
-	dout("da\n");
-	fsc->debugfs_caps = debugfs_create_file("caps",
+	client->debugfs_caps = debugfs_create_file("caps",
 						   0400,
-						   fsc->client->debugfs_dir,
-						   fsc,
+						   client->debugfs_dir,
+						   client,
 						   &caps_show_fops);
-	if (!fsc->debugfs_caps)
+	if (!client->debugfs_caps)
 		goto out;
 
-	dout("ea\n");
-	fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
-					0600,
-					fsc->client->debugfs_dir,
-					fsc,
-					&dentry_lru_show_fops);
-	if (!fsc->debugfs_dentry_lru)
+	client->debugfs_congestion_kb =
+		debugfs_create_file("writeback_congestion_kb",
+				    0600,
+				    client->debugfs_dir,
+				    client,
+				    &congestion_kb_fops);
+	if (!client->debugfs_congestion_kb)
 		goto out;
 
+	sprintf(name, "../../bdi/%s", dev_name(client->sb->s_bdi->dev));
+	client->debugfs_bdi = debugfs_create_symlink("bdi", client->debugfs_dir,
+						     name);
+
 	return 0;
 
 out:
-	ceph_fs_debugfs_cleanup(fsc);
-	return err;
+	ceph_debugfs_client_cleanup(client);
+	return ret;
 }
 
+void ceph_debugfs_client_cleanup(struct ceph_client *client)
+{
+	debugfs_remove(client->debugfs_bdi);
+	debugfs_remove(client->debugfs_caps);
+	debugfs_remove(client->debugfs_dentry_lru);
+	debugfs_remove(client->debugfs_osdmap);
+	debugfs_remove(client->debugfs_mdsmap);
+	debugfs_remove(client->debugfs_monmap);
+	debugfs_remove(client->osdc.debugfs_file);
+	debugfs_remove(client->mdsc.debugfs_file);
+	debugfs_remove(client->monc.debugfs_file);
+	debugfs_remove(client->debugfs_congestion_kb);
+	debugfs_remove(client->debugfs_dir);
+}
 
 #else  /* CONFIG_DEBUG_FS */
 
-int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
+int __init ceph_debugfs_init(void)
+{
+	return 0;
+}
+
+void ceph_debugfs_cleanup(void)
+{
+}
+
+int ceph_debugfs_client_init(struct ceph_client *client)
 {
 	return 0;
 }
 
-void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
+void ceph_debugfs_client_cleanup(struct ceph_client *client)
 {
 }
 
diff --git a/trunk/include/linux/ceph/decode.h b/trunk/fs/ceph/decode.h
similarity index 96%
rename from trunk/include/linux/ceph/decode.h
rename to trunk/fs/ceph/decode.h
index c5b6939fb32a..3d25415afe63 100644
--- a/trunk/include/linux/ceph/decode.h
+++ b/trunk/fs/ceph/decode.h
@@ -191,11 +191,6 @@ static inline void ceph_encode_string(void **p, void *end,
 		ceph_encode_need(p, end, n, bad);		\
 		ceph_encode_copy(p, pv, n);			\
 	} while (0)
-#define ceph_encode_string_safe(p, end, s, n, bad)		\
-	do {							\
-		ceph_encode_need(p, end, n, bad);		\
-		ceph_encode_string(p, end, s, n);		\
-	} while (0)
 
 
 #endif
diff --git a/trunk/fs/ceph/dir.c b/trunk/fs/ceph/dir.c
index e0a2dc6fcafc..a1986eb52045 100644
--- a/trunk/fs/ceph/dir.c
+++ b/trunk/fs/ceph/dir.c
@@ -1,4 +1,4 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/spinlock.h>
 #include <linux/fs_struct.h>
@@ -7,7 +7,6 @@
 #include <linux/sched.h>
 
 #include "super.h"
-#include "mds_client.h"
 
 /*
  * Directory operations: readdir, lookup, create, link, unlink,
@@ -95,7 +94,10 @@ static unsigned fpos_off(loff_t p)
  */
 static int __dcache_readdir(struct file *filp,
 			    void *dirent, filldir_t filldir)
+		__releases(inode->i_lock)
+		__acquires(inode->i_lock)
 {
+	struct inode *inode = filp->f_dentry->d_inode;
 	struct ceph_file_info *fi = filp->private_data;
 	struct dentry *parent = filp->f_dentry;
 	struct inode *dir = parent->d_inode;
@@ -151,6 +153,7 @@ static int __dcache_readdir(struct file *filp,
 
 	atomic_inc(&dentry->d_count);
 	spin_unlock(&dcache_lock);
+	spin_unlock(&inode->i_lock);
 
 	dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
 	     dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
@@ -168,30 +171,35 @@ static int __dcache_readdir(struct file *filp,
 		} else {
 			dput(last);
 		}
+		last = NULL;
 	}
+
+	spin_lock(&inode->i_lock);
+	spin_lock(&dcache_lock);
+
 	last = dentry;
 
 	if (err < 0)
-		goto out;
+		goto out_unlock;
 
+	p = p->prev;
 	filp->f_pos++;
 
 	/* make sure a dentry wasn't dropped while we didn't have dcache_lock */
-	if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
-		dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
-		err = -EAGAIN;
-		goto out;
-	}
-
-	spin_lock(&dcache_lock);
-	p = p->prev;	/* advance to next dentry */
-	goto more;
+	if ((ceph_inode(dir)->i_ceph_flags & CEPH_I_COMPLETE))
+		goto more;
+	dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
+	err = -EAGAIN;
 
 out_unlock:
 	spin_unlock(&dcache_lock);
-out:
-	if (last)
+
+	if (last) {
+		spin_unlock(&inode->i_lock);
 		dput(last);
+		spin_lock(&inode->i_lock);
+	}
+
 	return err;
 }
 
@@ -219,15 +227,15 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	struct ceph_file_info *fi = filp->private_data;
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_inode_to_client(inode);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	unsigned frag = fpos_frag(filp->f_pos);
 	int off = fpos_off(filp->f_pos);
 	int err;
 	u32 ftype;
 	struct ceph_mds_reply_info_parsed *rinfo;
-	const int max_entries = fsc->mount_options->max_readdir;
-	const int max_bytes = fsc->mount_options->max_readdir_bytes;
+	const int max_entries = client->mount_args->max_readdir;
+	const int max_bytes = client->mount_args->max_readdir_bytes;
 
 	dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
 	if (fi->at_end)
@@ -259,17 +267,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	/* can we use the dcache? */
 	spin_lock(&inode->i_lock);
 	if ((filp->f_pos == 2 || fi->dentry) &&
-	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
+	    !ceph_test_opt(client, NOASYNCREADDIR) &&
 	    ceph_snap(inode) != CEPH_SNAPDIR &&
 	    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
-		spin_unlock(&inode->i_lock);
 		err = __dcache_readdir(filp, dirent, filldir);
-		if (err != -EAGAIN)
+		if (err != -EAGAIN) {
+			spin_unlock(&inode->i_lock);
 			return err;
-	} else {
-		spin_unlock(&inode->i_lock);
+		}
 	}
+	spin_unlock(&inode->i_lock);
 	if (fi->dentry) {
 		err = note_last_dentry(fi, fi->dentry->d_name.name,
 				       fi->dentry->d_name.len);
@@ -479,13 +487,14 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
 				  struct dentry *dentry, int err)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+	struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
 	struct inode *parent = dentry->d_parent->d_inode;
 
 	/* .snap dir? */
 	if (err == -ENOENT &&
+	    ceph_vino(parent).ino != CEPH_INO_ROOT && /* no .snap in root dir */
 	    strcmp(dentry->d_name.name,
-		   fsc->mount_options->snapdir_name) == 0) {
+		   client->mount_args->snapdir_name) == 0) {
 		struct inode *inode = ceph_get_snapdir(parent);
 		dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
 		     dentry, dentry->d_name.len, dentry->d_name.name, inode);
@@ -530,8 +539,8 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 				  struct nameidata *nd)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct ceph_mds_request *req;
 	int op;
 	int err;
@@ -563,7 +572,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 		spin_lock(&dir->i_lock);
 		dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
 		if (strncmp(dentry->d_name.name,
-			    fsc->mount_options->snapdir_name,
+			    client->mount_args->snapdir_name,
 			    dentry->d_name.len) &&
 		    !is_root_ceph_dentry(dir, dentry) &&
 		    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
@@ -620,8 +629,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
 static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 		      int mode, dev_t rdev)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 
@@ -676,8 +685,8 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, int mode,
 static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 			    const char *dest)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 
@@ -707,8 +716,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 
 static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct ceph_mds_request *req;
 	int err = -EROFS;
 	int op;
@@ -749,8 +758,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 static int ceph_link(struct dentry *old_dentry, struct inode *dir,
 		     struct dentry *dentry)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 
@@ -804,8 +813,8 @@ static int drop_caps_for_unlink(struct inode *inode)
  */
 static int ceph_unlink(struct inode *dir, struct dentry *dentry)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct inode *inode = dentry->d_inode;
 	struct ceph_mds_request *req;
 	int err = -EROFS;
@@ -845,8 +854,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
 static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 		       struct inode *new_dir, struct dentry *new_dentry)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_sb_to_client(old_dir->i_sb);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 
@@ -1067,7 +1076,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int left;
 
-	if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
+	if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
 		return -EISDIR;
 
 	if (!cf->dir_info) {
@@ -1168,7 +1177,7 @@ void ceph_dentry_lru_add(struct dentry *dn)
 	dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
 	     dn->d_name.len, dn->d_name.name);
 	if (di) {
-		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
+		mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
 		spin_lock(&mdsc->dentry_lru_lock);
 		list_add_tail(&di->lru, &mdsc->dentry_lru);
 		mdsc->num_dentry++;
@@ -1184,7 +1193,7 @@ void ceph_dentry_lru_touch(struct dentry *dn)
 	dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
 	     dn->d_name.len, dn->d_name.name, di->offset);
 	if (di) {
-		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
+		mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
 		spin_lock(&mdsc->dentry_lru_lock);
 		list_move_tail(&di->lru, &mdsc->dentry_lru);
 		spin_unlock(&mdsc->dentry_lru_lock);
@@ -1199,7 +1208,7 @@ void ceph_dentry_lru_del(struct dentry *dn)
 	dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
 	     dn->d_name.len, dn->d_name.name);
 	if (di) {
-		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
+		mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
 		spin_lock(&mdsc->dentry_lru_lock);
 		list_del_init(&di->lru);
 		mdsc->num_dentry--;
diff --git a/trunk/fs/ceph/export.c b/trunk/fs/ceph/export.c
index 2297d9426992..e38423e82f2e 100644
--- a/trunk/fs/ceph/export.c
+++ b/trunk/fs/ceph/export.c
@@ -1,11 +1,10 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/exportfs.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
 
 #include "super.h"
-#include "mds_client.h"
 
 /*
  * NFS export support
@@ -121,7 +120,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
 static struct dentry *__cfh_to_dentry(struct super_block *sb,
 				      struct ceph_nfs_confh *cfh)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+	struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc;
 	struct inode *inode;
 	struct dentry *dentry;
 	struct ceph_vino vino;
diff --git a/trunk/fs/ceph/file.c b/trunk/fs/ceph/file.c
index e77c28cf3690..66e4da6dba22 100644
--- a/trunk/fs/ceph/file.c
+++ b/trunk/fs/ceph/file.c
@@ -1,6 +1,5 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
-#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/file.h>
@@ -39,8 +38,8 @@
 static struct ceph_mds_request *
 prepare_open_request(struct super_block *sb, int flags, int create_mode)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_sb_to_client(sb);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct ceph_mds_request *req;
 	int want_auth = USE_ANY_MDS;
 	int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN;
@@ -118,8 +117,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 int ceph_open(struct inode *inode, struct file *file)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_sb_to_client(inode->i_sb);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct ceph_mds_request *req;
 	struct ceph_file_info *cf = file->private_data;
 	struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
@@ -217,8 +216,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
 				struct nameidata *nd, int mode,
 				int locked_dir)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct file *file = nd->intent.open.file;
 	struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
 	struct ceph_mds_request *req;
@@ -270,6 +269,163 @@ int ceph_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+/*
+ * build a vector of user pages
+ */
+static struct page **get_direct_page_vector(const char __user *data,
+					    int num_pages,
+					    loff_t off, size_t len)
+{
+	struct page **pages;
+	int rc;
+
+	pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
+	if (!pages)
+		return ERR_PTR(-ENOMEM);
+
+	down_read(&current->mm->mmap_sem);
+	rc = get_user_pages(current, current->mm, (unsigned long)data,
+			    num_pages, 0, 0, pages, NULL);
+	up_read(&current->mm->mmap_sem);
+	if (rc < 0)
+		goto fail;
+	return pages;
+
+fail:
+	kfree(pages);
+	return ERR_PTR(rc);
+}
+
+static void put_page_vector(struct page **pages, int num_pages)
+{
+	int i;
+
+	for (i = 0; i < num_pages; i++)
+		put_page(pages[i]);
+	kfree(pages);
+}
+
+void ceph_release_page_vector(struct page **pages, int num_pages)
+{
+	int i;
+
+	for (i = 0; i < num_pages; i++)
+		__free_pages(pages[i], 0);
+	kfree(pages);
+}
+
+/*
+ * allocate a vector new pages
+ */
+static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
+{
+	struct page **pages;
+	int i;
+
+	pages = kmalloc(sizeof(*pages) * num_pages, flags);
+	if (!pages)
+		return ERR_PTR(-ENOMEM);
+	for (i = 0; i < num_pages; i++) {
+		pages[i] = __page_cache_alloc(flags);
+		if (pages[i] == NULL) {
+			ceph_release_page_vector(pages, i);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+	return pages;
+}
+
+/*
+ * copy user data into a page vector
+ */
+static int copy_user_to_page_vector(struct page **pages,
+				    const char __user *data,
+				    loff_t off, size_t len)
+{
+	int i = 0;
+	int po = off & ~PAGE_CACHE_MASK;
+	int left = len;
+	int l, bad;
+
+	while (left > 0) {
+		l = min_t(int, PAGE_CACHE_SIZE-po, left);
+		bad = copy_from_user(page_address(pages[i]) + po, data, l);
+		if (bad == l)
+			return -EFAULT;
+		data += l - bad;
+		left -= l - bad;
+		po += l - bad;
+		if (po == PAGE_CACHE_SIZE) {
+			po = 0;
+			i++;
+		}
+	}
+	return len;
+}
+
+/*
+ * copy user data from a page vector into a user pointer
+ */
+static int copy_page_vector_to_user(struct page **pages, char __user *data,
+				    loff_t off, size_t len)
+{
+	int i = 0;
+	int po = off & ~PAGE_CACHE_MASK;
+	int left = len;
+	int l, bad;
+
+	while (left > 0) {
+		l = min_t(int, left, PAGE_CACHE_SIZE-po);
+		bad = copy_to_user(data, page_address(pages[i]) + po, l);
+		if (bad == l)
+			return -EFAULT;
+		data += l - bad;
+		left -= l - bad;
+		if (po) {
+			po += l - bad;
+			if (po == PAGE_CACHE_SIZE)
+				po = 0;
+		}
+		i++;
+	}
+	return len;
+}
+
+/*
+ * Zero an extent within a page vector.  Offset is relative to the
+ * start of the first page.
+ */
+static void zero_page_vector_range(int off, int len, struct page **pages)
+{
+	int i = off >> PAGE_CACHE_SHIFT;
+
+	off &= ~PAGE_CACHE_MASK;
+
+	dout("zero_page_vector_page %u~%u\n", off, len);
+
+	/* leading partial page? */
+	if (off) {
+		int end = min((int)PAGE_CACHE_SIZE, off + len);
+		dout("zeroing %d %p head from %d\n", i, pages[i],
+		     (int)off);
+		zero_user_segment(pages[i], off, end);
+		len -= (end - off);
+		i++;
+	}
+	while (len >= PAGE_CACHE_SIZE) {
+		dout("zeroing %d %p len=%d\n", i, pages[i], len);
+		zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
+		len -= PAGE_CACHE_SIZE;
+		i++;
+	}
+	/* trailing partial page? */
+	if (len) {
+		dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
+		zero_user_segment(pages[i], 0, len);
+	}
+}
+
+
 /*
  * Read a range of bytes striped over one or more objects.  Iterate over
  * objects we stripe over.  (That's not atomic, but good enough for now.)
@@ -282,7 +438,7 @@ static int striped_read(struct inode *inode,
 			struct page **pages, int num_pages,
 			int *checkeof)
 {
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_client *client = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u64 pos, this_len;
 	int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
@@ -303,7 +459,7 @@ static int striped_read(struct inode *inode,
 
 more:
 	this_len = left;
-	ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
+	ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode),
 				  &ci->i_layout, pos, &this_len,
 				  ci->i_truncate_seq,
 				  ci->i_truncate_size,
@@ -321,8 +477,8 @@ static int striped_read(struct inode *inode,
 
 		if (read < pos - off) {
 			dout(" zero gap %llu to %llu\n", off + read, pos);
-			ceph_zero_page_vector_range(page_off + read,
-						    pos - off - read, pages);
+			zero_page_vector_range(page_off + read,
+					       pos - off - read, pages);
 		}
 		pos += ret;
 		read = pos - off;
@@ -339,8 +495,8 @@ static int striped_read(struct inode *inode,
 		/* was original extent fully inside i_size? */
 		if (pos + left <= inode->i_size) {
 			dout("zero tail\n");
-			ceph_zero_page_vector_range(page_off + read, len - read,
-						    pages);
+			zero_page_vector_range(page_off + read, len - read,
+					       pages);
 			read = len;
 			goto out;
 		}
@@ -375,7 +531,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
 
 	if (file->f_flags & O_DIRECT) {
-		pages = ceph_get_direct_page_vector(data, num_pages, off, len);
+		pages = get_direct_page_vector(data, num_pages, off, len);
 
 		/*
 		 * flush any page cache pages in this range.  this
@@ -396,13 +552,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 	ret = striped_read(inode, off, len, pages, num_pages, checkeof);
 
 	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
-		ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
+		ret = copy_page_vector_to_user(pages, data, off, ret);
 	if (ret >= 0)
 		*poff = off + ret;
 
 done:
 	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages);
+		put_page_vector(pages, num_pages);
 	else
 		ceph_release_page_vector(pages, num_pages);
 	dout("sync_read result %d\n", ret);
@@ -438,7 +594,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_client *client = ceph_inode_to_client(inode);
 	struct ceph_osd_request *req;
 	struct page **pages;
 	int num_pages;
@@ -486,7 +642,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 	 */
 more:
 	len = left;
-	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+	req = ceph_osdc_new_request(&client->osdc, &ci->i_layout,
 				    ceph_vino(inode), pos, &len,
 				    CEPH_OSD_OP_WRITE, flags,
 				    ci->i_snap_realm->cached_context,
@@ -499,7 +655,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 	num_pages = calc_pages_for(pos, len);
 
 	if (file->f_flags & O_DIRECT) {
-		pages = ceph_get_direct_page_vector(data, num_pages, pos, len);
+		pages = get_direct_page_vector(data, num_pages, pos, len);
 		if (IS_ERR(pages)) {
 			ret = PTR_ERR(pages);
 			goto out;
@@ -517,7 +673,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 			ret = PTR_ERR(pages);
 			goto out;
 		}
-		ret = ceph_copy_user_to_page_vector(pages, data, pos, len);
+		ret = copy_user_to_page_vector(pages, data, pos, len);
 		if (ret < 0) {
 			ceph_release_page_vector(pages, num_pages);
 			goto out;
@@ -533,7 +689,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 	req->r_num_pages = num_pages;
 	req->r_inode = inode;
 
-	ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+	ret = ceph_osdc_start_request(&client->osdc, req, false);
 	if (!ret) {
 		if (req->r_safe_callback) {
 			/*
@@ -545,11 +701,11 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 			spin_unlock(&ci->i_unsafe_lock);
 			ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
 		}
-		ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+		ret = ceph_osdc_wait_request(&client->osdc, req);
 	}
 
 	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages);
+		put_page_vector(pages, num_pages);
 	else if (file->f_flags & O_SYNC)
 		ceph_release_page_vector(pages, num_pages);
 
@@ -658,8 +814,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	struct ceph_file_info *fi = file->private_data;
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_osd_client *osdc =
-		&ceph_sb_to_client(inode->i_sb)->client->osdc;
+	struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
 	loff_t endoff = pos + iov->iov_len;
 	int want, got = 0;
 	int ret, err;
diff --git a/trunk/fs/ceph/inode.c b/trunk/fs/ceph/inode.c
index 1d6a45b5a04c..62377ec37edf 100644
--- a/trunk/fs/ceph/inode.c
+++ b/trunk/fs/ceph/inode.c
@@ -1,4 +1,4 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/module.h>
 #include <linux/fs.h>
@@ -13,8 +13,7 @@
 #include <linux/pagevec.h>
 
 #include "super.h"
-#include "mds_client.h"
-#include <linux/ceph/decode.h>
+#include "decode.h"
 
 /*
  * Ceph inode operations
@@ -385,7 +384,7 @@ void ceph_destroy_inode(struct inode *inode)
 	 */
 	if (ci->i_snap_realm) {
 		struct ceph_mds_client *mdsc =
-			ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+			&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
 		struct ceph_snap_realm *realm = ci->i_snap_realm;
 
 		dout(" dropping residual ref to snap realm %p\n", realm);
@@ -686,7 +685,7 @@ static int fill_inode(struct inode *inode,
 		}
 
 		/* it may be better to set st_size in getattr instead? */
-		if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
+		if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
 			inode->i_size = ci->i_rbytes;
 		break;
 	default:
@@ -902,7 +901,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 	struct inode *in = NULL;
 	struct ceph_mds_reply_inode *ininfo;
 	struct ceph_vino vino;
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+	struct ceph_client *client = ceph_sb_to_client(sb);
 	int i = 0;
 	int err = 0;
 
@@ -966,7 +965,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 	 */
 	if (rinfo->head->is_dentry && !req->r_aborted &&
 	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
-					       fsc->mount_options->snapdir_name,
+					       client->mount_args->snapdir_name,
 					       req->r_dentry->d_name.len))) {
 		/*
 		 * lookup link rename   : null -> possibly existing inode
@@ -1534,7 +1533,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
 	struct inode *parent_inode = dentry->d_parent->d_inode;
 	const unsigned int ia_valid = attr->ia_valid;
 	struct ceph_mds_request *req;
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
+	struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc;
 	int issued;
 	int release = 0, dirtied = 0;
 	int mask = 0;
@@ -1729,8 +1728,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
  */
 int ceph_do_getattr(struct inode *inode, int mask)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_sb_to_client(inode->i_sb);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 
diff --git a/trunk/fs/ceph/ioctl.c b/trunk/fs/ceph/ioctl.c
index 8888c9ba68db..76e307d2aba1 100644
--- a/trunk/fs/ceph/ioctl.c
+++ b/trunk/fs/ceph/ioctl.c
@@ -1,10 +1,8 @@
 #include <linux/in.h>
 
-#include "super.h"
-#include "mds_client.h"
-#include <linux/ceph/ceph_debug.h>
-
 #include "ioctl.h"
+#include "super.h"
+#include "ceph_debug.h"
 
 
 /*
@@ -39,7 +37,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_mds_request *req;
 	struct ceph_ioctl_layout l;
 	int err, i;
@@ -91,68 +89,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 	return err;
 }
 
-/*
- * Set a layout policy on a directory inode. All items in the tree
- * rooted at this inode will inherit this layout on creation,
- * (It doesn't apply retroactively )
- * unless a subdirectory has its own layout policy.
- */
-static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_mds_request *req;
-	struct ceph_ioctl_layout l;
-	int err, i;
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
-
-	/* copy and validate */
-	if (copy_from_user(&l, arg, sizeof(l)))
-		return -EFAULT;
-
-	if ((l.object_size & ~PAGE_MASK) ||
-	    (l.stripe_unit & ~PAGE_MASK) ||
-	    !l.stripe_unit ||
-	    (l.object_size &&
-	        (unsigned)l.object_size % (unsigned)l.stripe_unit))
-		return -EINVAL;
-
-	/* make sure it's a valid data pool */
-	if (l.data_pool > 0) {
-		mutex_lock(&mdsc->mutex);
-		err = -EINVAL;
-		for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
-			if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) {
-				err = 0;
-				break;
-			}
-		mutex_unlock(&mdsc->mutex);
-		if (err)
-			return err;
-	}
-
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT,
-				       USE_AUTH_MDS);
-
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-	req->r_inode = igrab(inode);
-
-	req->r_args.setlayout.layout.fl_stripe_unit =
-			cpu_to_le32(l.stripe_unit);
-	req->r_args.setlayout.layout.fl_stripe_count =
-			cpu_to_le32(l.stripe_count);
-	req->r_args.setlayout.layout.fl_object_size =
-			cpu_to_le32(l.object_size);
-	req->r_args.setlayout.layout.fl_pg_pool =
-			cpu_to_le32(l.data_pool);
-	req->r_args.setlayout.layout.fl_pg_preferred =
-			cpu_to_le32(l.preferred_osd);
-
-	err = ceph_mdsc_do_request(mdsc, inode, req);
-	ceph_mdsc_put_request(req);
-	return err;
-}
-
 /*
  * Return object name, size/offset information, and location (OSD
  * number, network address) for a given file offset.
@@ -162,8 +98,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 	struct ceph_ioctl_dataloc dl;
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_osd_client *osdc =
-		&ceph_sb_to_client(inode->i_sb)->client->osdc;
+	struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
 	u64 len = 1, olen;
 	u64 tmp;
 	struct ceph_object_layout ol;
@@ -239,15 +174,11 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case CEPH_IOC_SET_LAYOUT:
 		return ceph_ioctl_set_layout(file, (void __user *)arg);
 
-	case CEPH_IOC_SET_LAYOUT_POLICY:
-		return ceph_ioctl_set_layout_policy(file, (void __user *)arg);
-
 	case CEPH_IOC_GET_DATALOC:
 		return ceph_ioctl_get_dataloc(file, (void __user *)arg);
 
 	case CEPH_IOC_LAZYIO:
 		return ceph_ioctl_lazyio(file);
 	}
-
 	return -ENOTTY;
 }
diff --git a/trunk/fs/ceph/ioctl.h b/trunk/fs/ceph/ioctl.h
index a6ce54e94eb5..88451a3b6857 100644
--- a/trunk/fs/ceph/ioctl.h
+++ b/trunk/fs/ceph/ioctl.h
@@ -4,7 +4,7 @@
 #include <linux/ioctl.h>
 #include <linux/types.h>
 
-#define CEPH_IOCTL_MAGIC 0x98
+#define CEPH_IOCTL_MAGIC 0x97
 
 /* just use u64 to align sanely on all archs */
 struct ceph_ioctl_layout {
@@ -17,8 +17,6 @@ struct ceph_ioctl_layout {
 				   struct ceph_ioctl_layout)
 #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2,		\
 				   struct ceph_ioctl_layout)
-#define CEPH_IOC_SET_LAYOUT_POLICY _IOW(CEPH_IOCTL_MAGIC, 5,	\
-				   struct ceph_ioctl_layout)
 
 /*
  * Extract identity, address of the OSD and object storing a given
diff --git a/trunk/fs/ceph/locks.c b/trunk/fs/ceph/locks.c
index 40abde93c345..ff4e753aae92 100644
--- a/trunk/fs/ceph/locks.c
+++ b/trunk/fs/ceph/locks.c
@@ -1,11 +1,11 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/file.h>
 #include <linux/namei.h>
 
 #include "super.h"
 #include "mds_client.h"
-#include <linux/ceph/pagelist.h>
+#include "pagelist.h"
 
 /**
  * Implement fcntl and flock locking functions.
@@ -16,7 +16,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_mds_client *mdsc =
-		ceph_sb_to_client(inode->i_sb)->mdsc;
+		&ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 
@@ -181,9 +181,8 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
  * Encode the flock and fcntl locks for the given inode into the pagelist.
  * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
  * sequential flock locks.
- * Must be called with lock_flocks() already held.
- * If we encounter more of a specific lock type than expected,
- * we return the value 1.
+ * Must be called with BLK already held, and the lock numbers should have
+ * been gathered under the same lock holding window.
  */
 int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
 		      int num_fcntl_locks, int num_flock_locks)
@@ -191,8 +190,6 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
 	struct file_lock *lock;
 	struct ceph_filelock cephlock;
 	int err = 0;
-	int seen_fcntl = 0;
-	int seen_flock = 0;
 
 	dout("encoding %d flock and %d fcntl locks", num_flock_locks,
 	     num_fcntl_locks);
@@ -201,11 +198,6 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
 		goto fail;
 	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
 		if (lock->fl_flags & FL_POSIX) {
-			++seen_fcntl;
-			if (seen_fcntl > num_fcntl_locks) {
-				err = -ENOSPC;
-				goto fail;
-			}
 			err = lock_to_ceph_filelock(lock, &cephlock);
 			if (err)
 				goto fail;
@@ -221,11 +213,6 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
 		goto fail;
 	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
 		if (lock->fl_flags & FL_FLOCK) {
-			++seen_flock;
-			if (seen_flock > num_flock_locks) {
-				err = -ENOSPC;
-				goto fail;
-			}
 			err = lock_to_ceph_filelock(lock, &cephlock);
 			if (err)
 				goto fail;
diff --git a/trunk/fs/ceph/mds_client.c b/trunk/fs/ceph/mds_client.c
index 3142b15940c2..fad95f8f2608 100644
--- a/trunk/fs/ceph/mds_client.c
+++ b/trunk/fs/ceph/mds_client.c
@@ -1,21 +1,17 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
-#include <linux/fs.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
 #include <linux/smp_lock.h>
 
-#include "super.h"
 #include "mds_client.h"
-
-#include <linux/ceph/messenger.h>
-#include <linux/ceph/decode.h>
-#include <linux/ceph/pagelist.h>
-#include <linux/ceph/auth.h>
-#include <linux/ceph/debugfs.h>
+#include "mon_client.h"
+#include "super.h"
+#include "messenger.h"
+#include "decode.h"
+#include "auth.h"
+#include "pagelist.h"
 
 /*
  * A cluster of MDS (metadata server) daemons is responsible for
@@ -290,9 +286,8 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
 	     atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
 	if (atomic_dec_and_test(&s->s_ref)) {
 		if (s->s_authorizer)
-		     s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
-			     s->s_mdsc->fsc->client->monc.auth,
-			     s->s_authorizer);
+			s->s_mdsc->client->monc.auth->ops->destroy_authorizer(
+				s->s_mdsc->client->monc.auth, s->s_authorizer);
 		kfree(s);
 	}
 }
@@ -349,7 +344,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	s->s_seq = 0;
 	mutex_init(&s->s_mutex);
 
-	ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
+	ceph_con_init(mdsc->client->msgr, &s->s_con);
 	s->s_con.private = s;
 	s->s_con.ops = &mds_con_ops;
 	s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
@@ -604,7 +599,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 	} else if (req->r_dentry) {
 		struct inode *dir = req->r_dentry->d_parent->d_inode;
 
-		if (dir->i_sb != mdsc->fsc->sb) {
+		if (dir->i_sb != mdsc->client->sb) {
 			/* not this fs! */
 			inode = req->r_dentry->d_inode;
 		} else if (ceph_snap(dir) != CEPH_NOSNAP) {
@@ -889,7 +884,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	__ceph_remove_cap(cap);
 	if (!__ceph_is_any_real_caps(ci)) {
 		struct ceph_mds_client *mdsc =
-			ceph_sb_to_client(inode->i_sb)->mdsc;
+			&ceph_sb_to_client(inode->i_sb)->mdsc;
 
 		spin_lock(&mdsc->cap_dirty_lock);
 		if (!list_empty(&ci->i_dirty_item)) {
@@ -1151,7 +1146,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
 	struct ceph_msg *msg, *partial = NULL;
 	struct ceph_mds_cap_release *head;
 	int err = -ENOMEM;
-	int extra = mdsc->fsc->mount_options->cap_release_safety;
+	int extra = mdsc->client->mount_args->cap_release_safety;
 	int num;
 
 	dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds,
@@ -2090,7 +2085,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 
 	/* insert trace into our cache */
 	mutex_lock(&req->r_fill_mutex);
-	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
+	err = ceph_fill_trace(mdsc->client->sb, req, req->r_session);
 	if (err == 0) {
 		if (result == 0 && rinfo->dir_nr)
 			ceph_readdir_prepopulate(req, req->r_session);
@@ -2366,35 +2361,19 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 
 	if (recon_state->flock) {
 		int num_fcntl_locks, num_flock_locks;
-		struct ceph_pagelist_cursor trunc_point;
-
-		ceph_pagelist_set_cursor(pagelist, &trunc_point);
-		do {
-			lock_flocks();
-			ceph_count_locks(inode, &num_fcntl_locks,
-					 &num_flock_locks);
-			rec.v2.flock_len = (2*sizeof(u32) +
-					    (num_fcntl_locks+num_flock_locks) *
-					    sizeof(struct ceph_filelock));
-			unlock_flocks();
-
-			/* pre-alloc pagelist */
-			ceph_pagelist_truncate(pagelist, &trunc_point);
-			err = ceph_pagelist_append(pagelist, &rec, reclen);
-			if (!err)
-				err = ceph_pagelist_reserve(pagelist,
-							    rec.v2.flock_len);
-
-			/* encode locks */
-			if (!err) {
-				lock_flocks();
-				err = ceph_encode_locks(inode,
-							pagelist,
-							num_fcntl_locks,
-							num_flock_locks);
-				unlock_flocks();
-			}
-		} while (err == -ENOSPC);
+
+		lock_kernel();
+		ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
+		rec.v2.flock_len = (2*sizeof(u32) +
+				    (num_fcntl_locks+num_flock_locks) *
+				    sizeof(struct ceph_filelock));
+
+		err = ceph_pagelist_append(pagelist, &rec, reclen);
+		if (!err)
+			err = ceph_encode_locks(inode, pagelist,
+						num_fcntl_locks,
+						num_flock_locks);
+		unlock_kernel();
 	} else {
 		err = ceph_pagelist_append(pagelist, &rec, reclen);
 	}
@@ -2634,7 +2613,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 			 struct ceph_mds_session *session,
 			 struct ceph_msg *msg)
 {
-	struct super_block *sb = mdsc->fsc->sb;
+	struct super_block *sb = mdsc->client->sb;
 	struct inode *inode;
 	struct ceph_inode_info *ci;
 	struct dentry *parent, *dentry;
@@ -2912,16 +2891,10 @@ static void delayed_work(struct work_struct *work)
 	schedule_delayed(mdsc);
 }
 
-int ceph_mdsc_init(struct ceph_fs_client *fsc)
 
+int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
 {
-	struct ceph_mds_client *mdsc;
-
-	mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
-	if (!mdsc)
-		return -ENOMEM;
-	mdsc->fsc = fsc;
-	fsc->mdsc = mdsc;
+	mdsc->client = client;
 	mutex_init(&mdsc->mutex);
 	mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
 	if (mdsc->mdsmap == NULL)
@@ -2954,7 +2927,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	INIT_LIST_HEAD(&mdsc->dentry_lru);
 
 	ceph_caps_init(mdsc);
-	ceph_adjust_min_caps(mdsc, fsc->min_caps);
+	ceph_adjust_min_caps(mdsc, client->min_caps);
 
 	return 0;
 }
@@ -2966,7 +2939,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 static void wait_requests(struct ceph_mds_client *mdsc)
 {
 	struct ceph_mds_request *req;
-	struct ceph_fs_client *fsc = mdsc->fsc;
+	struct ceph_client *client = mdsc->client;
 
 	mutex_lock(&mdsc->mutex);
 	if (__get_oldest_req(mdsc)) {
@@ -2974,7 +2947,7 @@ static void wait_requests(struct ceph_mds_client *mdsc)
 
 		dout("wait_requests waiting for requests\n");
 		wait_for_completion_timeout(&mdsc->safe_umount_waiters,
-				    fsc->client->options->mount_timeout * HZ);
+				    client->mount_args->mount_timeout * HZ);
 
 		/* tear down remaining requests */
 		mutex_lock(&mdsc->mutex);
@@ -3057,7 +3030,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 {
 	u64 want_tid, want_flush;
 
-	if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
+	if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
 		return;
 
 	dout("sync\n");
@@ -3080,7 +3053,7 @@ bool done_closing_sessions(struct ceph_mds_client *mdsc)
 {
 	int i, n = 0;
 
-	if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
+	if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
 		return true;
 
 	mutex_lock(&mdsc->mutex);
@@ -3098,8 +3071,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 {
 	struct ceph_mds_session *session;
 	int i;
-	struct ceph_fs_client *fsc = mdsc->fsc;
-	unsigned long timeout = fsc->client->options->mount_timeout * HZ;
+	struct ceph_client *client = mdsc->client;
+	unsigned long timeout = client->mount_args->mount_timeout * HZ;
 
 	dout("close_sessions\n");
 
@@ -3146,7 +3119,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 	dout("stopped\n");
 }
 
-static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
+void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
 {
 	dout("stop\n");
 	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
@@ -3156,15 +3129,6 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
 	ceph_caps_finalize(mdsc);
 }
 
-void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
-{
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-
-	ceph_mdsc_stop(mdsc);
-	fsc->mdsc = NULL;
-	kfree(mdsc);
-}
-
 
 /*
  * handle mds map update.
@@ -3181,14 +3145,14 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 
 	ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad);
 	ceph_decode_copy(&p, &fsid, sizeof(fsid));
-	if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0)
+	if (ceph_check_fsid(mdsc->client, &fsid) < 0)
 		return;
 	epoch = ceph_decode_32(&p);
 	maplen = ceph_decode_32(&p);
 	dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
 
 	/* do we need it? */
-	ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
+	ceph_monc_got_mdsmap(&mdsc->client->monc, epoch);
 	mutex_lock(&mdsc->mutex);
 	if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
 		dout("handle_map epoch %u <= our %u\n",
@@ -3212,7 +3176,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 	} else {
 		mdsc->mdsmap = newmap;  /* first mds map */
 	}
-	mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
+	mdsc->client->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
 
 	__wake_requests(mdsc, &mdsc->waiting_for_map);
 
@@ -3313,7 +3277,7 @@ static int get_authorizer(struct ceph_connection *con,
 {
 	struct ceph_mds_session *s = con->private;
 	struct ceph_mds_client *mdsc = s->s_mdsc;
-	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+	struct ceph_auth_client *ac = mdsc->client->monc.auth;
 	int ret = 0;
 
 	if (force_new && s->s_authorizer) {
@@ -3347,7 +3311,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
 {
 	struct ceph_mds_session *s = con->private;
 	struct ceph_mds_client *mdsc = s->s_mdsc;
-	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+	struct ceph_auth_client *ac = mdsc->client->monc.auth;
 
 	return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len);
 }
@@ -3356,12 +3320,12 @@ static int invalidate_authorizer(struct ceph_connection *con)
 {
 	struct ceph_mds_session *s = con->private;
 	struct ceph_mds_client *mdsc = s->s_mdsc;
-	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+	struct ceph_auth_client *ac = mdsc->client->monc.auth;
 
 	if (ac->ops->invalidate_authorizer)
 		ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS);
 
-	return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
+	return ceph_monc_validate_auth(&mdsc->client->monc);
 }
 
 static const struct ceph_connection_operations mds_con_ops = {
@@ -3374,4 +3338,7 @@ static const struct ceph_connection_operations mds_con_ops = {
 	.peer_reset = peer_reset,
 };
 
+
+
+
 /* eof */
diff --git a/trunk/fs/ceph/mds_client.h b/trunk/fs/ceph/mds_client.h
index d66d63c72355..c98267ce6d2a 100644
--- a/trunk/fs/ceph/mds_client.h
+++ b/trunk/fs/ceph/mds_client.h
@@ -8,9 +8,9 @@
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 
-#include <linux/ceph/types.h>
-#include <linux/ceph/messenger.h>
-#include <linux/ceph/mdsmap.h>
+#include "types.h"
+#include "messenger.h"
+#include "mdsmap.h"
 
 /*
  * Some lock dependencies:
@@ -26,7 +26,7 @@
  *
  */
 
-struct ceph_fs_client;
+struct ceph_client;
 struct ceph_cap;
 
 /*
@@ -230,7 +230,7 @@ struct ceph_mds_request {
  * mds client state
  */
 struct ceph_mds_client {
-	struct ceph_fs_client  *fsc;
+	struct ceph_client      *client;
 	struct mutex            mutex;         /* all nested structures */
 
 	struct ceph_mdsmap      *mdsmap;
@@ -289,6 +289,11 @@ struct ceph_mds_client {
 	int		caps_avail_count;    /* unused, unreserved */
 	int		caps_min_count;      /* keep at least this many
 						(unreserved) */
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry 	  *debugfs_file;
+#endif
+
 	spinlock_t	  dentry_lru_lock;
 	struct list_head  dentry_lru;
 	int		  num_dentry;
@@ -311,9 +316,10 @@ extern void ceph_put_mds_session(struct ceph_mds_session *s);
 extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc,
 			     struct ceph_msg *msg, int mds);
 
-extern int ceph_mdsc_init(struct ceph_fs_client *fsc);
+extern int ceph_mdsc_init(struct ceph_mds_client *mdsc,
+			   struct ceph_client *client);
 extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc);
-extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc);
+extern void ceph_mdsc_stop(struct ceph_mds_client *mdsc);
 
 extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
 
diff --git a/trunk/fs/ceph/mdsmap.c b/trunk/fs/ceph/mdsmap.c
index 73b7d44e8a35..040be6d1150b 100644
--- a/trunk/fs/ceph/mdsmap.c
+++ b/trunk/fs/ceph/mdsmap.c
@@ -1,4 +1,4 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/bug.h>
 #include <linux/err.h>
@@ -6,9 +6,9 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
-#include <linux/ceph/mdsmap.h>
-#include <linux/ceph/messenger.h>
-#include <linux/ceph/decode.h>
+#include "mdsmap.h"
+#include "messenger.h"
+#include "decode.h"
 
 #include "super.h"
 
@@ -117,8 +117,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 		}
 
 		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
-		     i+1, n, global_id, mds, inc,
-		     ceph_pr_addr(&addr.in_addr),
+		     i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr),
 		     ceph_mds_state_name(state));
 		if (mds >= 0 && mds < m->m_max_mds && state > 0) {
 			m->m_info[mds].global_id = global_id;
diff --git a/trunk/include/linux/ceph/mdsmap.h b/trunk/fs/ceph/mdsmap.h
similarity index 100%
rename from trunk/include/linux/ceph/mdsmap.h
rename to trunk/fs/ceph/mdsmap.h
diff --git a/trunk/net/ceph/messenger.c b/trunk/fs/ceph/messenger.c
similarity index 89%
rename from trunk/net/ceph/messenger.c
rename to trunk/fs/ceph/messenger.c
index 0e8157ee5d43..2502d76fcec1 100644
--- a/trunk/net/ceph/messenger.c
+++ b/trunk/fs/ceph/messenger.c
@@ -1,4 +1,4 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/crc32c.h>
 #include <linux/ctype.h>
@@ -9,14 +9,12 @@
 #include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/string.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
 #include <net/tcp.h>
 
-#include <linux/ceph/libceph.h>
-#include <linux/ceph/messenger.h>
-#include <linux/ceph/decode.h>
-#include <linux/ceph/pagelist.h>
+#include "super.h"
+#include "messenger.h"
+#include "decode.h"
+#include "pagelist.h"
 
 /*
  * Ceph uses the messenger to exchange ceph_msg messages with other
@@ -50,7 +48,7 @@ static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
 static DEFINE_SPINLOCK(addr_str_lock);
 static int last_addr_str;
 
-const char *ceph_pr_addr(const struct sockaddr_storage *ss)
+const char *pr_addr(const struct sockaddr_storage *ss)
 {
 	int i;
 	char *s;
@@ -81,7 +79,6 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss)
 
 	return s;
 }
-EXPORT_SYMBOL(ceph_pr_addr);
 
 static void encode_my_addr(struct ceph_messenger *msgr)
 {
@@ -94,7 +91,7 @@ static void encode_my_addr(struct ceph_messenger *msgr)
  */
 struct workqueue_struct *ceph_msgr_wq;
 
-int ceph_msgr_init(void)
+int __init ceph_msgr_init(void)
 {
 	ceph_msgr_wq = create_workqueue("ceph-msgr");
 	if (IS_ERR(ceph_msgr_wq)) {
@@ -105,19 +102,16 @@ int ceph_msgr_init(void)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(ceph_msgr_init);
 
 void ceph_msgr_exit(void)
 {
 	destroy_workqueue(ceph_msgr_wq);
 }
-EXPORT_SYMBOL(ceph_msgr_exit);
 
 void ceph_msgr_flush(void)
 {
 	flush_workqueue(ceph_msgr_wq);
 }
-EXPORT_SYMBOL(ceph_msgr_flush);
 
 
 /*
@@ -227,19 +221,19 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
 
 	set_sock_callbacks(sock, con);
 
-	dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
+	dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
 
 	ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
 				 O_NONBLOCK);
 	if (ret == -EINPROGRESS) {
 		dout("connect %s EINPROGRESS sk_state = %u\n",
-		     ceph_pr_addr(&con->peer_addr.in_addr),
+		     pr_addr(&con->peer_addr.in_addr),
 		     sock->sk->sk_state);
 		ret = 0;
 	}
 	if (ret < 0) {
 		pr_err("connect %s error %d\n",
-		       ceph_pr_addr(&con->peer_addr.in_addr), ret);
+		       pr_addr(&con->peer_addr.in_addr), ret);
 		sock_release(sock);
 		con->sock = NULL;
 		con->error_msg = "connect error";
@@ -340,8 +334,7 @@ static void reset_connection(struct ceph_connection *con)
  */
 void ceph_con_close(struct ceph_connection *con)
 {
-	dout("con_close %p peer %s\n", con,
-	     ceph_pr_addr(&con->peer_addr.in_addr));
+	dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr));
 	set_bit(CLOSED, &con->state);  /* in case there's queued work */
 	clear_bit(STANDBY, &con->state);  /* avoid connect_seq bump */
 	clear_bit(LOSSYTX, &con->state);  /* so we retry next connect */
@@ -354,21 +347,19 @@ void ceph_con_close(struct ceph_connection *con)
 	mutex_unlock(&con->mutex);
 	queue_con(con);
 }
-EXPORT_SYMBOL(ceph_con_close);
 
 /*
  * Reopen a closed connection, with a new peer address.
  */
 void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
 {
-	dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
+	dout("con_open %p %s\n", con, pr_addr(&addr->in_addr));
 	set_bit(OPENING, &con->state);
 	clear_bit(CLOSED, &con->state);
 	memcpy(&con->peer_addr, addr, sizeof(*addr));
 	con->delay = 0;      /* reset backoff memory */
 	queue_con(con);
 }
-EXPORT_SYMBOL(ceph_con_open);
 
 /*
  * return true if this connection ever successfully opened
@@ -415,7 +406,6 @@ void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
 	INIT_LIST_HEAD(&con->out_sent);
 	INIT_DELAYED_WORK(&con->work, con_work);
 }
-EXPORT_SYMBOL(ceph_con_init);
 
 
 /*
@@ -539,11 +529,8 @@ static void prepare_write_message(struct ceph_connection *con)
 	if (le32_to_cpu(m->hdr.data_len) > 0) {
 		/* initialize page iterator */
 		con->out_msg_pos.page = 0;
-		if (m->pages)
-			con->out_msg_pos.page_pos =
-				le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK;
-		else
-			con->out_msg_pos.page_pos = 0;
+		con->out_msg_pos.page_pos =
+			le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK;
 		con->out_msg_pos.data_pos = 0;
 		con->out_msg_pos.did_page_crc = 0;
 		con->out_more = 1;  /* data + footer will follow */
@@ -660,7 +647,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
 	dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
 	     con->connect_seq, global_seq, proto);
 
-	con->out_connect.features = cpu_to_le64(msgr->supported_features);
+	con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED);
 	con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
 	con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
 	con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -725,31 +712,6 @@ static int write_partial_kvec(struct ceph_connection *con)
 	return ret;  /* done! */
 }
 
-#ifdef CONFIG_BLOCK
-static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
-{
-	if (!bio) {
-		*iter = NULL;
-		*seg = 0;
-		return;
-	}
-	*iter = bio;
-	*seg = bio->bi_idx;
-}
-
-static void iter_bio_next(struct bio **bio_iter, int *seg)
-{
-	if (*bio_iter == NULL)
-		return;
-
-	BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
-
-	(*seg)++;
-	if (*seg == (*bio_iter)->bi_vcnt)
-		init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
-}
-#endif
-
 /*
  * Write as much message data payload as we can.  If we finish, queue
  * up the footer.
@@ -764,46 +726,21 @@ static int write_partial_msg_pages(struct ceph_connection *con)
 	size_t len;
 	int crc = con->msgr->nocrc;
 	int ret;
-	int total_max_write;
-	int in_trail = 0;
-	size_t trail_len = (msg->trail ? msg->trail->length : 0);
 
 	dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
 	     con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
 	     con->out_msg_pos.page_pos);
 
-#ifdef CONFIG_BLOCK
-	if (msg->bio && !msg->bio_iter)
-		init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
-#endif
-
-	while (data_len > con->out_msg_pos.data_pos) {
+	while (con->out_msg_pos.page < con->out_msg->nr_pages) {
 		struct page *page = NULL;
 		void *kaddr = NULL;
-		int max_write = PAGE_SIZE;
-		int page_shift = 0;
-
-		total_max_write = data_len - trail_len -
-			con->out_msg_pos.data_pos;
 
 		/*
 		 * if we are calculating the data crc (the default), we need
 		 * to map the page.  if our pages[] has been revoked, use the
 		 * zero page.
 		 */
-
-		/* have we reached the trail part of the data? */
-		if (con->out_msg_pos.data_pos >= data_len - trail_len) {
-			in_trail = 1;
-
-			total_max_write = data_len - con->out_msg_pos.data_pos;
-
-			page = list_first_entry(&msg->trail->head,
-						struct page, lru);
-			if (crc)
-				kaddr = kmap(page);
-			max_write = PAGE_SIZE;
-		} else if (msg->pages) {
+		if (msg->pages) {
 			page = msg->pages[con->out_msg_pos.page];
 			if (crc)
 				kaddr = kmap(page);
@@ -812,25 +749,13 @@ static int write_partial_msg_pages(struct ceph_connection *con)
 						struct page, lru);
 			if (crc)
 				kaddr = kmap(page);
-#ifdef CONFIG_BLOCK
-		} else if (msg->bio) {
-			struct bio_vec *bv;
-
-			bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg);
-			page = bv->bv_page;
-			page_shift = bv->bv_offset;
-			if (crc)
-				kaddr = kmap(page) + page_shift;
-			max_write = bv->bv_len;
-#endif
 		} else {
 			page = con->msgr->zero_page;
 			if (crc)
 				kaddr = page_address(con->msgr->zero_page);
 		}
-		len = min_t(int, max_write - con->out_msg_pos.page_pos,
-			    total_max_write);
-
+		len = min((int)(PAGE_SIZE - con->out_msg_pos.page_pos),
+			  (int)(data_len - con->out_msg_pos.data_pos));
 		if (crc && !con->out_msg_pos.did_page_crc) {
 			void *base = kaddr + con->out_msg_pos.page_pos;
 			u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
@@ -840,14 +765,13 @@ static int write_partial_msg_pages(struct ceph_connection *con)
 				cpu_to_le32(crc32c(tmpcrc, base, len));
 			con->out_msg_pos.did_page_crc = 1;
 		}
+
 		ret = kernel_sendpage(con->sock, page,
-				      con->out_msg_pos.page_pos + page_shift,
-				      len,
+				      con->out_msg_pos.page_pos, len,
 				      MSG_DONTWAIT | MSG_NOSIGNAL |
 				      MSG_MORE);
 
-		if (crc &&
-		    (msg->pages || msg->pagelist || msg->bio || in_trail))
+		if (crc && (msg->pages || msg->pagelist))
 			kunmap(page);
 
 		if (ret <= 0)
@@ -859,16 +783,9 @@ static int write_partial_msg_pages(struct ceph_connection *con)
 			con->out_msg_pos.page_pos = 0;
 			con->out_msg_pos.page++;
 			con->out_msg_pos.did_page_crc = 0;
-			if (in_trail)
-				list_move_tail(&page->lru,
-					       &msg->trail->head);
-			else if (msg->pagelist)
+			if (msg->pagelist)
 				list_move_tail(&page->lru,
 					       &msg->pagelist->head);
-#ifdef CONFIG_BLOCK
-			else if (msg->bio)
-				iter_bio_next(&msg->bio_iter, &msg->bio_seg);
-#endif
 		}
 	}
 
@@ -1021,7 +938,7 @@ static int verify_hello(struct ceph_connection *con)
 {
 	if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
 		pr_err("connect to %s got bad banner\n",
-		       ceph_pr_addr(&con->peer_addr.in_addr));
+		       pr_addr(&con->peer_addr.in_addr));
 		con->error_msg = "protocol error, bad banner";
 		return -1;
 	}
@@ -1124,7 +1041,7 @@ int ceph_parse_ips(const char *c, const char *end,
 
 		addr_set_port(ss, port);
 
-		dout("parse_ips got %s\n", ceph_pr_addr(ss));
+		dout("parse_ips got %s\n", pr_addr(ss));
 
 		if (p == end)
 			break;
@@ -1144,7 +1061,6 @@ int ceph_parse_ips(const char *c, const char *end,
 	pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
 	return -EINVAL;
 }
-EXPORT_SYMBOL(ceph_parse_ips);
 
 static int process_banner(struct ceph_connection *con)
 {
@@ -1166,9 +1082,9 @@ static int process_banner(struct ceph_connection *con)
 	    !(addr_is_blank(&con->actual_peer_addr.in_addr) &&
 	      con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
 		pr_warning("wrong peer, want %s/%d, got %s/%d\n",
-			   ceph_pr_addr(&con->peer_addr.in_addr),
+			   pr_addr(&con->peer_addr.in_addr),
 			   (int)le32_to_cpu(con->peer_addr.nonce),
-			   ceph_pr_addr(&con->actual_peer_addr.in_addr),
+			   pr_addr(&con->actual_peer_addr.in_addr),
 			   (int)le32_to_cpu(con->actual_peer_addr.nonce));
 		con->error_msg = "wrong peer at address";
 		return -1;
@@ -1186,7 +1102,7 @@ static int process_banner(struct ceph_connection *con)
 		addr_set_port(&con->msgr->inst.addr.in_addr, port);
 		encode_my_addr(con->msgr);
 		dout("process_banner learned my addr is %s\n",
-		     ceph_pr_addr(&con->msgr->inst.addr.in_addr));
+		     pr_addr(&con->msgr->inst.addr.in_addr));
 	}
 
 	set_bit(NEGOTIATING, &con->state);
@@ -1207,8 +1123,8 @@ static void fail_protocol(struct ceph_connection *con)
 
 static int process_connect(struct ceph_connection *con)
 {
-	u64 sup_feat = con->msgr->supported_features;
-	u64 req_feat = con->msgr->required_features;
+	u64 sup_feat = CEPH_FEATURE_SUPPORTED;
+	u64 req_feat = CEPH_FEATURE_REQUIRED;
 	u64 server_feat = le64_to_cpu(con->in_reply.features);
 
 	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
@@ -1218,7 +1134,7 @@ static int process_connect(struct ceph_connection *con)
 		pr_err("%s%lld %s feature set mismatch,"
 		       " my %llx < server's %llx, missing %llx\n",
 		       ENTITY_NAME(con->peer_name),
-		       ceph_pr_addr(&con->peer_addr.in_addr),
+		       pr_addr(&con->peer_addr.in_addr),
 		       sup_feat, server_feat, server_feat & ~sup_feat);
 		con->error_msg = "missing required protocol features";
 		fail_protocol(con);
@@ -1228,7 +1144,7 @@ static int process_connect(struct ceph_connection *con)
 		pr_err("%s%lld %s protocol version mismatch,"
 		       " my %d != server's %d\n",
 		       ENTITY_NAME(con->peer_name),
-		       ceph_pr_addr(&con->peer_addr.in_addr),
+		       pr_addr(&con->peer_addr.in_addr),
 		       le32_to_cpu(con->out_connect.protocol_version),
 		       le32_to_cpu(con->in_reply.protocol_version));
 		con->error_msg = "protocol version mismatch";
@@ -1262,7 +1178,7 @@ static int process_connect(struct ceph_connection *con)
 		     le32_to_cpu(con->in_connect.connect_seq));
 		pr_err("%s%lld %s connection reset\n",
 		       ENTITY_NAME(con->peer_name),
-		       ceph_pr_addr(&con->peer_addr.in_addr));
+		       pr_addr(&con->peer_addr.in_addr));
 		reset_connection(con);
 		prepare_write_connect(con->msgr, con, 0);
 		prepare_read_connect(con);
@@ -1307,7 +1223,7 @@ static int process_connect(struct ceph_connection *con)
 			pr_err("%s%lld %s protocol feature mismatch,"
 			       " my required %llx > server's %llx, need %llx\n",
 			       ENTITY_NAME(con->peer_name),
-			       ceph_pr_addr(&con->peer_addr.in_addr),
+			       pr_addr(&con->peer_addr.in_addr),
 			       req_feat, server_feat, req_feat & ~server_feat);
 			con->error_msg = "missing required protocol features";
 			fail_protocol(con);
@@ -1389,7 +1305,8 @@ static int read_partial_message_section(struct ceph_connection *con,
 					struct kvec *section,
 					unsigned int sec_len, u32 *crc)
 {
-	int ret, left;
+	int left;
+	int ret;
 
 	BUG_ON(!section);
 
@@ -1412,83 +1329,13 @@ static int read_partial_message_section(struct ceph_connection *con,
 static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
 				struct ceph_msg_header *hdr,
 				int *skip);
-
-
-static int read_partial_message_pages(struct ceph_connection *con,
-				      struct page **pages,
-				      unsigned data_len, int datacrc)
-{
-	void *p;
-	int ret;
-	int left;
-
-	left = min((int)(data_len - con->in_msg_pos.data_pos),
-		   (int)(PAGE_SIZE - con->in_msg_pos.page_pos));
-	/* (page) data */
-	BUG_ON(pages == NULL);
-	p = kmap(pages[con->in_msg_pos.page]);
-	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
-			       left);
-	if (ret > 0 && datacrc)
-		con->in_data_crc =
-			crc32c(con->in_data_crc,
-				  p + con->in_msg_pos.page_pos, ret);
-	kunmap(pages[con->in_msg_pos.page]);
-	if (ret <= 0)
-		return ret;
-	con->in_msg_pos.data_pos += ret;
-	con->in_msg_pos.page_pos += ret;
-	if (con->in_msg_pos.page_pos == PAGE_SIZE) {
-		con->in_msg_pos.page_pos = 0;
-		con->in_msg_pos.page++;
-	}
-
-	return ret;
-}
-
-#ifdef CONFIG_BLOCK
-static int read_partial_message_bio(struct ceph_connection *con,
-				    struct bio **bio_iter, int *bio_seg,
-				    unsigned data_len, int datacrc)
-{
-	struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg);
-	void *p;
-	int ret, left;
-
-	if (IS_ERR(bv))
-		return PTR_ERR(bv);
-
-	left = min((int)(data_len - con->in_msg_pos.data_pos),
-		   (int)(bv->bv_len - con->in_msg_pos.page_pos));
-
-	p = kmap(bv->bv_page) + bv->bv_offset;
-
-	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
-			       left);
-	if (ret > 0 && datacrc)
-		con->in_data_crc =
-			crc32c(con->in_data_crc,
-				  p + con->in_msg_pos.page_pos, ret);
-	kunmap(bv->bv_page);
-	if (ret <= 0)
-		return ret;
-	con->in_msg_pos.data_pos += ret;
-	con->in_msg_pos.page_pos += ret;
-	if (con->in_msg_pos.page_pos == bv->bv_len) {
-		con->in_msg_pos.page_pos = 0;
-		iter_bio_next(bio_iter, bio_seg);
-	}
-
-	return ret;
-}
-#endif
-
 /*
  * read (part of) a message.
  */
 static int read_partial_message(struct ceph_connection *con)
 {
 	struct ceph_msg *m = con->in_msg;
+	void *p;
 	int ret;
 	int to, left;
 	unsigned front_len, middle_len, data_len, data_off;
@@ -1534,7 +1381,7 @@ static int read_partial_message(struct ceph_connection *con)
 	if ((s64)seq - (s64)con->in_seq < 1) {
 		pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
 			ENTITY_NAME(con->peer_name),
-			ceph_pr_addr(&con->peer_addr.in_addr),
+			pr_addr(&con->peer_addr.in_addr),
 			seq, con->in_seq + 1);
 		con->in_base_pos = -front_len - middle_len - data_len -
 			sizeof(m->footer);
@@ -1575,10 +1422,7 @@ static int read_partial_message(struct ceph_connection *con)
 			m->middle->vec.iov_len = 0;
 
 		con->in_msg_pos.page = 0;
-		if (m->pages)
-			con->in_msg_pos.page_pos = data_off & ~PAGE_MASK;
-		else
-			con->in_msg_pos.page_pos = 0;
+		con->in_msg_pos.page_pos = data_off & ~PAGE_MASK;
 		con->in_msg_pos.data_pos = 0;
 	}
 
@@ -1596,29 +1440,27 @@ static int read_partial_message(struct ceph_connection *con)
 		if (ret <= 0)
 			return ret;
 	}
-#ifdef CONFIG_BLOCK
-	if (m->bio && !m->bio_iter)
-		init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
-#endif
 
 	/* (page) data */
 	while (con->in_msg_pos.data_pos < data_len) {
-		if (m->pages) {
-			ret = read_partial_message_pages(con, m->pages,
-						 data_len, datacrc);
-			if (ret <= 0)
-				return ret;
-#ifdef CONFIG_BLOCK
-		} else if (m->bio) {
-
-			ret = read_partial_message_bio(con,
-						 &m->bio_iter, &m->bio_seg,
-						 data_len, datacrc);
-			if (ret <= 0)
-				return ret;
-#endif
-		} else {
-			BUG_ON(1);
+		left = min((int)(data_len - con->in_msg_pos.data_pos),
+			   (int)(PAGE_SIZE - con->in_msg_pos.page_pos));
+		BUG_ON(m->pages == NULL);
+		p = kmap(m->pages[con->in_msg_pos.page]);
+		ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
+				       left);
+		if (ret > 0 && datacrc)
+			con->in_data_crc =
+				crc32c(con->in_data_crc,
+					  p + con->in_msg_pos.page_pos, ret);
+		kunmap(m->pages[con->in_msg_pos.page]);
+		if (ret <= 0)
+			return ret;
+		con->in_msg_pos.data_pos += ret;
+		con->in_msg_pos.page_pos += ret;
+		if (con->in_msg_pos.page_pos == PAGE_SIZE) {
+			con->in_msg_pos.page_pos = 0;
+			con->in_msg_pos.page++;
 		}
 	}
 
@@ -2032,9 +1874,9 @@ static void con_work(struct work_struct *work)
 static void ceph_fault(struct ceph_connection *con)
 {
 	pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
-	       ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+	       pr_addr(&con->peer_addr.in_addr), con->error_msg);
 	dout("fault %p state %lu to peer %s\n",
-	     con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+	     con, con->state, pr_addr(&con->peer_addr.in_addr));
 
 	if (test_bit(LOSSYTX, &con->state)) {
 		dout("fault on LOSSYTX channel\n");
@@ -2094,9 +1936,7 @@ static void ceph_fault(struct ceph_connection *con)
 /*
  * create a new messenger instance
  */
-struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
-					     u32 supported_features,
-					     u32 required_features)
+struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr)
 {
 	struct ceph_messenger *msgr;
 
@@ -2104,9 +1944,6 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
 	if (msgr == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	msgr->supported_features = supported_features;
-	msgr->required_features = required_features;
-
 	spin_lock_init(&msgr->global_seq_lock);
 
 	/* the zero page is needed if a request is "canceled" while the message
@@ -2129,7 +1966,6 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
 	dout("messenger_create %p\n", msgr);
 	return msgr;
 }
-EXPORT_SYMBOL(ceph_messenger_create);
 
 void ceph_messenger_destroy(struct ceph_messenger *msgr)
 {
@@ -2139,7 +1975,6 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr)
 	kfree(msgr);
 	dout("destroyed messenger %p\n", msgr);
 }
-EXPORT_SYMBOL(ceph_messenger_destroy);
 
 /*
  * Queue up an outgoing message on the given connection.
@@ -2176,7 +2011,6 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
 	if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
 		queue_con(con);
 }
-EXPORT_SYMBOL(ceph_con_send);
 
 /*
  * Revoke a message that was previously queued for send
@@ -2242,7 +2076,6 @@ void ceph_con_keepalive(struct ceph_connection *con)
 	    test_and_set_bit(WRITE_PENDING, &con->state) == 0)
 		queue_con(con);
 }
-EXPORT_SYMBOL(ceph_con_keepalive);
 
 
 /*
@@ -2303,10 +2136,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
 	m->nr_pages = 0;
 	m->pages = NULL;
 	m->pagelist = NULL;
-	m->bio = NULL;
-	m->bio_iter = NULL;
-	m->bio_seg = 0;
-	m->trail = NULL;
 
 	dout("ceph_msg_new %p front %d\n", m, front_len);
 	return m;
@@ -2317,7 +2146,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
 	pr_err("msg_new can't create type %d front %d\n", type, front_len);
 	return NULL;
 }
-EXPORT_SYMBOL(ceph_msg_new);
 
 /*
  * Allocate "middle" portion of a message, if it is needed and wasn't
@@ -2422,14 +2250,11 @@ void ceph_msg_last_put(struct kref *kref)
 		m->pagelist = NULL;
 	}
 
-	m->trail = NULL;
-
 	if (m->pool)
 		ceph_msgpool_put(m->pool, m);
 	else
 		ceph_msg_kfree(m);
 }
-EXPORT_SYMBOL(ceph_msg_last_put);
 
 void ceph_msg_dump(struct ceph_msg *msg)
 {
@@ -2450,4 +2275,3 @@ void ceph_msg_dump(struct ceph_msg *msg)
 		       DUMP_PREFIX_OFFSET, 16, 1,
 		       &msg->footer, sizeof(msg->footer), true);
 }
-EXPORT_SYMBOL(ceph_msg_dump);
diff --git a/trunk/include/linux/ceph/messenger.h b/trunk/fs/ceph/messenger.h
similarity index 95%
rename from trunk/include/linux/ceph/messenger.h
rename to trunk/fs/ceph/messenger.h
index 5956d62c3057..76fbc957bc13 100644
--- a/trunk/include/linux/ceph/messenger.h
+++ b/trunk/fs/ceph/messenger.h
@@ -65,9 +65,6 @@ struct ceph_messenger {
 	 */
 	u32 global_seq;
 	spinlock_t global_seq_lock;
-
-	u32 supported_features;
-	u32 required_features;
 };
 
 /*
@@ -85,10 +82,6 @@ struct ceph_msg {
 	struct ceph_pagelist *pagelist; /* instead of pages */
 	struct list_head list_head;
 	struct kref kref;
-	struct bio  *bio;		/* instead of pages/pagelist */
-	struct bio  *bio_iter;		/* bio iterator */
-	int bio_seg;			/* current bio segment */
-	struct ceph_pagelist *trail;	/* the trailing part of the data */
 	bool front_is_vmalloc;
 	bool more_to_follow;
 	bool needs_out_seq;
@@ -212,7 +205,7 @@ struct ceph_connection {
 };
 
 
-extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
+extern const char *pr_addr(const struct sockaddr_storage *ss);
 extern int ceph_parse_ips(const char *c, const char *end,
 			  struct ceph_entity_addr *addr,
 			  int max_count, int *count);
@@ -223,8 +216,7 @@ extern void ceph_msgr_exit(void);
 extern void ceph_msgr_flush(void);
 
 extern struct ceph_messenger *ceph_messenger_create(
-	struct ceph_entity_addr *myaddr,
-	u32 features, u32 required);
+	struct ceph_entity_addr *myaddr);
 extern void ceph_messenger_destroy(struct ceph_messenger *);
 
 extern void ceph_con_init(struct ceph_messenger *msgr,
diff --git a/trunk/net/ceph/mon_client.c b/trunk/fs/ceph/mon_client.c
similarity index 94%
rename from trunk/net/ceph/mon_client.c
rename to trunk/fs/ceph/mon_client.c
index 8a079399174a..b2a5a3e4a671 100644
--- a/trunk/net/ceph/mon_client.c
+++ b/trunk/fs/ceph/mon_client.c
@@ -1,16 +1,14 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/sched.h>
 
-#include <linux/ceph/mon_client.h>
-#include <linux/ceph/libceph.h>
-#include <linux/ceph/decode.h>
-
-#include <linux/ceph/auth.h>
+#include "mon_client.h"
+#include "super.h"
+#include "auth.h"
+#include "decode.h"
 
 /*
  * Interact with Ceph monitor cluster.  Handle requests for new map
@@ -76,7 +74,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
 	     m->num_mon);
 	for (i = 0; i < m->num_mon; i++)
 		dout("monmap_decode  mon%d is %s\n", i,
-		     ceph_pr_addr(&m->mon_inst[i].addr.in_addr));
+		     pr_addr(&m->mon_inst[i].addr.in_addr));
 	return m;
 
 bad:
@@ -193,33 +191,30 @@ static void __send_subscribe(struct ceph_mon_client *monc)
 		struct ceph_msg *msg = monc->m_subscribe;
 		struct ceph_mon_subscribe_item *i;
 		void *p, *end;
-		int num;
 
 		p = msg->front.iov_base;
 		end = p + msg->front_max;
 
-		num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap;
-		ceph_encode_32(&p, num);
-
+		dout("__send_subscribe to 'mdsmap' %u+\n",
+		     (unsigned)monc->have_mdsmap);
 		if (monc->want_next_osdmap) {
 			dout("__send_subscribe to 'osdmap' %u\n",
 			     (unsigned)monc->have_osdmap);
+			ceph_encode_32(&p, 3);
 			ceph_encode_string(&p, end, "osdmap", 6);
 			i = p;
 			i->have = cpu_to_le64(monc->have_osdmap);
 			i->onetime = 1;
 			p += sizeof(*i);
 			monc->want_next_osdmap = 2;  /* requested */
+		} else {
+			ceph_encode_32(&p, 2);
 		}
-		if (monc->want_mdsmap) {
-			dout("__send_subscribe to 'mdsmap' %u+\n",
-			     (unsigned)monc->have_mdsmap);
-			ceph_encode_string(&p, end, "mdsmap", 6);
-			i = p;
-			i->have = cpu_to_le64(monc->have_mdsmap);
-			i->onetime = 0;
-			p += sizeof(*i);
-		}
+		ceph_encode_string(&p, end, "mdsmap", 6);
+		i = p;
+		i->have = cpu_to_le64(monc->have_mdsmap);
+		i->onetime = 0;
+		p += sizeof(*i);
 		ceph_encode_string(&p, end, "monmap", 6);
 		i = p;
 		i->have = 0;
@@ -248,8 +243,7 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
 	mutex_lock(&monc->mutex);
 	if (monc->hunting) {
 		pr_info("mon%d %s session established\n",
-			monc->cur_mon,
-			ceph_pr_addr(&monc->con->peer_addr.in_addr));
+			monc->cur_mon, pr_addr(&monc->con->peer_addr.in_addr));
 		monc->hunting = false;
 	}
 	dout("handle_subscribe_ack after %d seconds\n", seconds);
@@ -272,7 +266,6 @@ int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got)
 	mutex_unlock(&monc->mutex);
 	return 0;
 }
-EXPORT_SYMBOL(ceph_monc_got_mdsmap);
 
 int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got)
 {
@@ -317,7 +310,6 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
 	mutex_unlock(&monc->mutex);
 	return 0;
 }
-EXPORT_SYMBOL(ceph_monc_open_session);
 
 /*
  * The monitor responds with mount ack indicate mount success.  The
@@ -548,7 +540,6 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
 	kref_put(&req->kref, release_generic_request);
 	return err;
 }
-EXPORT_SYMBOL(ceph_monc_do_statfs);
 
 /*
  * pool ops
@@ -660,7 +651,6 @@ int ceph_monc_create_snapid(struct ceph_mon_client *monc,
 				   pool, 0, (char *)snapid, sizeof(*snapid));
 
 }
-EXPORT_SYMBOL(ceph_monc_create_snapid);
 
 int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
 			    u32 pool, u64 snapid)
@@ -718,9 +708,9 @@ static void delayed_work(struct work_struct *work)
  */
 static int build_initial_monmap(struct ceph_mon_client *monc)
 {
-	struct ceph_options *opt = monc->client->options;
-	struct ceph_entity_addr *mon_addr = opt->mon_addr;
-	int num_mon = opt->num_mon;
+	struct ceph_mount_args *args = monc->client->mount_args;
+	struct ceph_entity_addr *mon_addr = args->mon_addr;
+	int num_mon = args->num_mon;
 	int i;
 
 	/* build initial monmap */
@@ -738,6 +728,11 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
 	}
 	monc->monmap->num_mon = num_mon;
 	monc->have_fsid = false;
+
+	/* release addr memory */
+	kfree(args->mon_addr);
+	args->mon_addr = NULL;
+	args->num_mon = 0;
 	return 0;
 }
 
@@ -758,8 +753,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
 	monc->con = NULL;
 
 	/* authentication */
-	monc->auth = ceph_auth_init(cl->options->name,
-				    cl->options->secret);
+	monc->auth = ceph_auth_init(cl->mount_args->name,
+				    cl->mount_args->secret);
 	if (IS_ERR(monc->auth))
 		return PTR_ERR(monc->auth);
 	monc->auth->want_keys =
@@ -813,7 +808,6 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
 out:
 	return err;
 }
-EXPORT_SYMBOL(ceph_monc_init);
 
 void ceph_monc_stop(struct ceph_mon_client *monc)
 {
@@ -838,7 +832,6 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
 
 	kfree(monc->monmap);
 }
-EXPORT_SYMBOL(ceph_monc_stop);
 
 static void handle_auth_reply(struct ceph_mon_client *monc,
 			      struct ceph_msg *msg)
@@ -896,7 +889,6 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc)
 	mutex_unlock(&monc->mutex);
 	return ret;
 }
-EXPORT_SYMBOL(ceph_monc_validate_auth);
 
 /*
  * handle incoming message
@@ -930,16 +922,15 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 		ceph_monc_handle_map(monc, msg);
 		break;
 
+	case CEPH_MSG_MDS_MAP:
+		ceph_mdsc_handle_map(&monc->client->mdsc, msg);
+		break;
+
 	case CEPH_MSG_OSD_MAP:
 		ceph_osdc_handle_map(&monc->client->osdc, msg);
 		break;
 
 	default:
-		/* can the chained handler handle it? */
-		if (monc->client->extra_mon_dispatch &&
-		    monc->client->extra_mon_dispatch(monc->client, msg) == 0)
-			break;
-			
 		pr_err("received unknown message type %d %s\n", type,
 		       ceph_msg_type_name(type));
 	}
@@ -1003,7 +994,7 @@ static void mon_fault(struct ceph_connection *con)
 	if (monc->con && !monc->hunting)
 		pr_info("mon%d %s session lost, "
 			"hunting for new mon\n", monc->cur_mon,
-			ceph_pr_addr(&monc->con->peer_addr.in_addr));
+			pr_addr(&monc->con->peer_addr.in_addr));
 
 	__close_session(monc);
 	if (!monc->hunting) {
diff --git a/trunk/include/linux/ceph/mon_client.h b/trunk/fs/ceph/mon_client.h
similarity index 99%
rename from trunk/include/linux/ceph/mon_client.h
rename to trunk/fs/ceph/mon_client.h
index 545f85917780..8e396f2c0963 100644
--- a/trunk/include/linux/ceph/mon_client.h
+++ b/trunk/fs/ceph/mon_client.h
@@ -79,7 +79,6 @@ struct ceph_mon_client {
 	u64 last_tid;
 
 	/* mds/osd map */
-	int want_mdsmap;
 	int want_next_osdmap; /* 1 = want, 2 = want+asked */
 	u32 have_osdmap, have_mdsmap;
 
diff --git a/trunk/net/ceph/msgpool.c b/trunk/fs/ceph/msgpool.c
similarity index 95%
rename from trunk/net/ceph/msgpool.c
rename to trunk/fs/ceph/msgpool.c
index d5f2d97ac05c..dd65a6438131 100644
--- a/trunk/net/ceph/msgpool.c
+++ b/trunk/fs/ceph/msgpool.c
@@ -1,11 +1,11 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/err.h>
 #include <linux/sched.h>
 #include <linux/types.h>
 #include <linux/vmalloc.h>
 
-#include <linux/ceph/msgpool.h>
+#include "msgpool.h"
 
 static void *alloc_fn(gfp_t gfp_mask, void *arg)
 {
diff --git a/trunk/include/linux/ceph/msgpool.h b/trunk/fs/ceph/msgpool.h
similarity index 100%
rename from trunk/include/linux/ceph/msgpool.h
rename to trunk/fs/ceph/msgpool.h
diff --git a/trunk/include/linux/ceph/msgr.h b/trunk/fs/ceph/msgr.h
similarity index 100%
rename from trunk/include/linux/ceph/msgr.h
rename to trunk/fs/ceph/msgr.h
diff --git a/trunk/net/ceph/osd_client.c b/trunk/fs/ceph/osd_client.c
similarity index 84%
rename from trunk/net/ceph/osd_client.c
rename to trunk/fs/ceph/osd_client.c
index 79391994b3ed..3b5571b8ce22 100644
--- a/trunk/net/ceph/osd_client.c
+++ b/trunk/fs/ceph/osd_client.c
@@ -1,22 +1,17 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
-#include <linux/module.h>
 #include <linux/err.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
-#ifdef CONFIG_BLOCK
-#include <linux/bio.h>
-#endif
 
-#include <linux/ceph/libceph.h>
-#include <linux/ceph/osd_client.h>
-#include <linux/ceph/messenger.h>
-#include <linux/ceph/decode.h>
-#include <linux/ceph/auth.h>
-#include <linux/ceph/pagelist.h>
+#include "super.h"
+#include "osd_client.h"
+#include "messenger.h"
+#include "decode.h"
+#include "auth.h"
 
 #define OSD_OP_FRONT_LEN	4096
 #define OSD_OPREPLY_FRONT_LEN	512
@@ -27,59 +22,6 @@ static int __kick_requests(struct ceph_osd_client *osdc,
 
 static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd);
 
-static int op_needs_trail(int op)
-{
-	switch (op) {
-	case CEPH_OSD_OP_GETXATTR:
-	case CEPH_OSD_OP_SETXATTR:
-	case CEPH_OSD_OP_CMPXATTR:
-	case CEPH_OSD_OP_CALL:
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-static int op_has_extent(int op)
-{
-	return (op == CEPH_OSD_OP_READ ||
-		op == CEPH_OSD_OP_WRITE);
-}
-
-void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
-			struct ceph_file_layout *layout,
-			u64 snapid,
-			u64 off, u64 *plen, u64 *bno,
-			struct ceph_osd_request *req,
-			struct ceph_osd_req_op *op)
-{
-	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
-	u64 orig_len = *plen;
-	u64 objoff, objlen;    /* extent in object */
-
-	reqhead->snapid = cpu_to_le64(snapid);
-
-	/* object extent? */
-	ceph_calc_file_object_mapping(layout, off, plen, bno,
-				      &objoff, &objlen);
-	if (*plen < orig_len)
-		dout(" skipping last %llu, final file extent %llu~%llu\n",
-		     orig_len - *plen, off, *plen);
-
-	if (op_has_extent(op->op)) {
-		op->extent.offset = objoff;
-		op->extent.length = objlen;
-	}
-	req->r_num_pages = calc_pages_for(off, *plen);
-	if (op->op == CEPH_OSD_OP_WRITE)
-		op->payload_len = *plen;
-
-	dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
-	     *bno, objoff, objlen, req->r_num_pages);
-
-}
-EXPORT_SYMBOL(ceph_calc_raw_layout);
-
 /*
  * Implement client access to distributed object storage cluster.
  *
@@ -106,19 +48,34 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
  * fill osd op in request message.
  */
 static void calc_layout(struct ceph_osd_client *osdc,
-			struct ceph_vino vino,
-			struct ceph_file_layout *layout,
+			struct ceph_vino vino, struct ceph_file_layout *layout,
 			u64 off, u64 *plen,
-			struct ceph_osd_request *req,
-			struct ceph_osd_req_op *op)
+			struct ceph_osd_request *req)
 {
+	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
+	struct ceph_osd_op *op = (void *)(reqhead + 1);
+	u64 orig_len = *plen;
+	u64 objoff, objlen;    /* extent in object */
 	u64 bno;
 
-	ceph_calc_raw_layout(osdc, layout, vino.snap, off,
-			     plen, &bno, req, op);
+	reqhead->snapid = cpu_to_le64(vino.snap);
+
+	/* object extent? */
+	ceph_calc_file_object_mapping(layout, off, plen, &bno,
+				      &objoff, &objlen);
+	if (*plen < orig_len)
+		dout(" skipping last %llu, final file extent %llu~%llu\n",
+		     orig_len - *plen, off, *plen);
 
 	sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno);
 	req->r_oid_len = strlen(req->r_oid);
+
+	op->extent.offset = cpu_to_le64(objoff);
+	op->extent.length = cpu_to_le64(objlen);
+	req->r_num_pages = calc_pages_for(off, *plen);
+
+	dout("calc_layout %s (%d) %llu~%llu (%d pages)\n",
+	     req->r_oid, req->r_oid_len, objoff, objlen, req->r_num_pages);
 }
 
 /*
@@ -144,66 +101,56 @@ void ceph_osdc_release_request(struct kref *kref)
 	if (req->r_own_pages)
 		ceph_release_page_vector(req->r_pages,
 					 req->r_num_pages);
-#ifdef CONFIG_BLOCK
-	if (req->r_bio)
-		bio_put(req->r_bio);
-#endif
 	ceph_put_snap_context(req->r_snapc);
-	if (req->r_trail) {
-		ceph_pagelist_release(req->r_trail);
-		kfree(req->r_trail);
-	}
 	if (req->r_mempool)
 		mempool_free(req, req->r_osdc->req_mempool);
 	else
 		kfree(req);
 }
-EXPORT_SYMBOL(ceph_osdc_release_request);
-
-static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail)
-{
-	int i = 0;
-
-	if (needs_trail)
-		*needs_trail = 0;
-	while (ops[i].op) {
-		if (needs_trail && op_needs_trail(ops[i].op))
-			*needs_trail = 1;
-		i++;
-	}
-
-	return i;
-}
 
-struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
-					       int flags,
+/*
+ * build new request AND message, calculate layout, and adjust file
+ * extent as needed.
+ *
+ * if the file was recently truncated, we include information about its
+ * old and new size so that the object can be updated appropriately.  (we
+ * avoid synchronously deleting truncated objects because it's slow.)
+ *
+ * if @do_sync, include a 'startsync' command so that the osd will flush
+ * data quickly.
+ */
+struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
+					       struct ceph_file_layout *layout,
+					       struct ceph_vino vino,
+					       u64 off, u64 *plen,
+					       int opcode, int flags,
 					       struct ceph_snap_context *snapc,
-					       struct ceph_osd_req_op *ops,
-					       bool use_mempool,
-					       gfp_t gfp_flags,
-					       struct page **pages,
-					       struct bio *bio)
+					       int do_sync,
+					       u32 truncate_seq,
+					       u64 truncate_size,
+					       struct timespec *mtime,
+					       bool use_mempool, int num_reply)
 {
 	struct ceph_osd_request *req;
 	struct ceph_msg *msg;
-	int needs_trail;
-	int num_op = get_num_ops(ops, &needs_trail);
-	size_t msg_size = sizeof(struct ceph_osd_request_head);
-
-	msg_size += num_op*sizeof(struct ceph_osd_op);
+	struct ceph_osd_request_head *head;
+	struct ceph_osd_op *op;
+	void *p;
+	int num_op = 1 + do_sync;
+	size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
+	int i;
 
 	if (use_mempool) {
-		req = mempool_alloc(osdc->req_mempool, gfp_flags);
+		req = mempool_alloc(osdc->req_mempool, GFP_NOFS);
 		memset(req, 0, sizeof(*req));
 	} else {
-		req = kzalloc(sizeof(*req), gfp_flags);
+		req = kzalloc(sizeof(*req), GFP_NOFS);
 	}
 	if (req == NULL)
 		return NULL;
 
 	req->r_osdc = osdc;
 	req->r_mempool = use_mempool;
-
 	kref_init(&req->r_kref);
 	init_completion(&req->r_completion);
 	init_completion(&req->r_safe_completion);
@@ -217,22 +164,13 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
 	else
 		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY,
-				   OSD_OPREPLY_FRONT_LEN, gfp_flags);
+				   OSD_OPREPLY_FRONT_LEN, GFP_NOFS);
 	if (!msg) {
 		ceph_osdc_put_request(req);
 		return NULL;
 	}
 	req->r_reply = msg;
 
-	/* allocate space for the trailing data */
-	if (needs_trail) {
-		req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags);
-		if (!req->r_trail) {
-			ceph_osdc_put_request(req);
-			return NULL;
-		}
-		ceph_pagelist_init(req->r_trail);
-	}
 	/* create request message; allow space for oid */
 	msg_size += 40;
 	if (snapc)
@@ -240,115 +178,18 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	if (use_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
 	else
-		msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags);
+		msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS);
 	if (!msg) {
 		ceph_osdc_put_request(req);
 		return NULL;
 	}
-
 	msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
 	memset(msg->front.iov_base, 0, msg->front.iov_len);
-
-	req->r_request = msg;
-	req->r_pages = pages;
-#ifdef CONFIG_BLOCK
-	if (bio) {
-		req->r_bio = bio;
-		bio_get(req->r_bio);
-	}
-#endif
-
-	return req;
-}
-EXPORT_SYMBOL(ceph_osdc_alloc_request);
-
-static void osd_req_encode_op(struct ceph_osd_request *req,
-			      struct ceph_osd_op *dst,
-			      struct ceph_osd_req_op *src)
-{
-	dst->op = cpu_to_le16(src->op);
-
-	switch (dst->op) {
-	case CEPH_OSD_OP_READ:
-	case CEPH_OSD_OP_WRITE:
-		dst->extent.offset =
-			cpu_to_le64(src->extent.offset);
-		dst->extent.length =
-			cpu_to_le64(src->extent.length);
-		dst->extent.truncate_size =
-			cpu_to_le64(src->extent.truncate_size);
-		dst->extent.truncate_seq =
-			cpu_to_le32(src->extent.truncate_seq);
-		break;
-
-	case CEPH_OSD_OP_GETXATTR:
-	case CEPH_OSD_OP_SETXATTR:
-	case CEPH_OSD_OP_CMPXATTR:
-		BUG_ON(!req->r_trail);
-
-		dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
-		dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
-		dst->xattr.cmp_op = src->xattr.cmp_op;
-		dst->xattr.cmp_mode = src->xattr.cmp_mode;
-		ceph_pagelist_append(req->r_trail, src->xattr.name,
-				     src->xattr.name_len);
-		ceph_pagelist_append(req->r_trail, src->xattr.val,
-				     src->xattr.value_len);
-		break;
-	case CEPH_OSD_OP_CALL:
-		BUG_ON(!req->r_trail);
-
-		dst->cls.class_len = src->cls.class_len;
-		dst->cls.method_len = src->cls.method_len;
-		dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
-
-		ceph_pagelist_append(req->r_trail, src->cls.class_name,
-				     src->cls.class_len);
-		ceph_pagelist_append(req->r_trail, src->cls.method_name,
-				     src->cls.method_len);
-		ceph_pagelist_append(req->r_trail, src->cls.indata,
-				     src->cls.indata_len);
-		break;
-	case CEPH_OSD_OP_ROLLBACK:
-		dst->snap.snapid = cpu_to_le64(src->snap.snapid);
-		break;
-	case CEPH_OSD_OP_STARTSYNC:
-		break;
-	default:
-		pr_err("unrecognized osd opcode %d\n", dst->op);
-		WARN_ON(1);
-		break;
-	}
-	dst->payload_len = cpu_to_le32(src->payload_len);
-}
-
-/*
- * build new request AND message
- *
- */
-void ceph_osdc_build_request(struct ceph_osd_request *req,
-			     u64 off, u64 *plen,
-			     struct ceph_osd_req_op *src_ops,
-			     struct ceph_snap_context *snapc,
-			     struct timespec *mtime,
-			     const char *oid,
-			     int oid_len)
-{
-	struct ceph_msg *msg = req->r_request;
-	struct ceph_osd_request_head *head;
-	struct ceph_osd_req_op *src_op;
-	struct ceph_osd_op *op;
-	void *p;
-	int num_op = get_num_ops(src_ops, NULL);
-	size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
-	int flags = req->r_flags;
-	u64 data_len = 0;
-	int i;
-
 	head = msg->front.iov_base;
 	op = (void *)(head + 1);
 	p = (void *)(op + num_op);
 
+	req->r_request = msg;
 	req->r_snapc = ceph_get_snap_context(snapc);
 
 	head->client_inc = cpu_to_le32(1); /* always, for now. */
@@ -356,23 +197,29 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 	if (flags & CEPH_OSD_FLAG_WRITE)
 		ceph_encode_timespec(&head->mtime, mtime);
 	head->num_ops = cpu_to_le16(num_op);
+	op->op = cpu_to_le16(opcode);
 
+	/* calculate max write size */
+	calc_layout(osdc, vino, layout, off, plen, req);
+	req->r_file_layout = *layout;  /* keep a copy */
 
-	/* fill in oid */
-	head->object_len = cpu_to_le32(oid_len);
-	memcpy(p, oid, oid_len);
-	p += oid_len;
-
-	src_op = src_ops;
-	while (src_op->op) {
-		osd_req_encode_op(req, op, src_op);
-		src_op++;
-		op++;
+	if (flags & CEPH_OSD_FLAG_WRITE) {
+		req->r_request->hdr.data_off = cpu_to_le16(off);
+		req->r_request->hdr.data_len = cpu_to_le32(*plen);
+		op->payload_len = cpu_to_le32(*plen);
 	}
+	op->extent.truncate_size = cpu_to_le64(truncate_size);
+	op->extent.truncate_seq = cpu_to_le32(truncate_seq);
 
-	if (req->r_trail)
-		data_len += req->r_trail->length;
+	/* fill in oid */
+	head->object_len = cpu_to_le32(req->r_oid_len);
+	memcpy(p, req->r_oid, req->r_oid_len);
+	p += req->r_oid_len;
 
+	if (do_sync) {
+		op++;
+		op->op = cpu_to_le16(CEPH_OSD_OP_STARTSYNC);
+	}
 	if (snapc) {
 		head->snap_seq = cpu_to_le64(snapc->seq);
 		head->num_snaps = cpu_to_le32(snapc->num_snaps);
@@ -382,79 +229,12 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 		}
 	}
 
-	if (flags & CEPH_OSD_FLAG_WRITE) {
-		req->r_request->hdr.data_off = cpu_to_le16(off);
-		req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len);
-	} else if (data_len) {
-		req->r_request->hdr.data_off = 0;
-		req->r_request->hdr.data_len = cpu_to_le32(data_len);
-	}
-
 	BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
 	msg_size = p - msg->front.iov_base;
 	msg->front.iov_len = msg_size;
 	msg->hdr.front_len = cpu_to_le32(msg_size);
-	return;
-}
-EXPORT_SYMBOL(ceph_osdc_build_request);
-
-/*
- * build new request AND message, calculate layout, and adjust file
- * extent as needed.
- *
- * if the file was recently truncated, we include information about its
- * old and new size so that the object can be updated appropriately.  (we
- * avoid synchronously deleting truncated objects because it's slow.)
- *
- * if @do_sync, include a 'startsync' command so that the osd will flush
- * data quickly.
- */
-struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
-					       struct ceph_file_layout *layout,
-					       struct ceph_vino vino,
-					       u64 off, u64 *plen,
-					       int opcode, int flags,
-					       struct ceph_snap_context *snapc,
-					       int do_sync,
-					       u32 truncate_seq,
-					       u64 truncate_size,
-					       struct timespec *mtime,
-					       bool use_mempool, int num_reply)
-{
-	struct ceph_osd_req_op ops[3];
-	struct ceph_osd_request *req;
-
-	ops[0].op = opcode;
-	ops[0].extent.truncate_seq = truncate_seq;
-	ops[0].extent.truncate_size = truncate_size;
-	ops[0].payload_len = 0;
-
-	if (do_sync) {
-		ops[1].op = CEPH_OSD_OP_STARTSYNC;
-		ops[1].payload_len = 0;
-		ops[2].op = 0;
-	} else
-		ops[1].op = 0;
-
-	req = ceph_osdc_alloc_request(osdc, flags,
-					 snapc, ops,
-					 use_mempool,
-					 GFP_NOFS, NULL, NULL);
-	if (IS_ERR(req))
-		return req;
-
-	/* calculate max write size */
-	calc_layout(osdc, vino, layout, off, plen, req, ops);
-	req->r_file_layout = *layout;  /* keep a copy */
-
-	ceph_osdc_build_request(req, off, plen, ops,
-				snapc,
-				mtime,
-				req->r_oid, req->r_oid_len);
-
 	return req;
 }
-EXPORT_SYMBOL(ceph_osdc_new_request);
 
 /*
  * We keep osd requests in an rbtree, sorted by ->r_tid.
@@ -609,7 +389,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc,
 	dout("__move_osd_to_lru %p\n", osd);
 	BUG_ON(!list_empty(&osd->o_osd_lru));
 	list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
-	osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
+	osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ;
 }
 
 static void __remove_osd_from_lru(struct ceph_osd *osd)
@@ -703,7 +483,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
 static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
 {
 	schedule_delayed_work(&osdc->timeout_work,
-			osdc->client->options->osd_keepalive_timeout * HZ);
+			osdc->client->mount_args->osd_keepalive_timeout * HZ);
 }
 
 static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
@@ -904,9 +684,9 @@ static void handle_timeout(struct work_struct *work)
 		container_of(work, struct ceph_osd_client, timeout_work.work);
 	struct ceph_osd_request *req, *last_req = NULL;
 	struct ceph_osd *osd;
-	unsigned long timeout = osdc->client->options->osd_timeout * HZ;
+	unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ;
 	unsigned long keepalive =
-		osdc->client->options->osd_keepalive_timeout * HZ;
+		osdc->client->mount_args->osd_keepalive_timeout * HZ;
 	unsigned long last_stamp = 0;
 	struct rb_node *p;
 	struct list_head slow_osds;
@@ -993,7 +773,7 @@ static void handle_osds_timeout(struct work_struct *work)
 		container_of(work, struct ceph_osd_client,
 			     osds_timeout_work.work);
 	unsigned long delay =
-		osdc->client->options->osd_idle_ttl * HZ >> 2;
+		osdc->client->mount_args->osd_idle_ttl * HZ >> 2;
 
 	dout("osds timeout\n");
 	down_read(&osdc->map_sem);
@@ -1324,10 +1104,6 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 
 	req->r_request->pages = req->r_pages;
 	req->r_request->nr_pages = req->r_num_pages;
-#ifdef CONFIG_BLOCK
-	req->r_request->bio = req->r_bio;
-#endif
-	req->r_request->trail = req->r_trail;
 
 	register_request(osdc, req);
 
@@ -1355,7 +1131,6 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 	up_read(&osdc->map_sem);
 	return rc;
 }
-EXPORT_SYMBOL(ceph_osdc_start_request);
 
 /*
  * wait for a request to complete
@@ -1378,7 +1153,6 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
 	dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result);
 	return req->r_result;
 }
-EXPORT_SYMBOL(ceph_osdc_wait_request);
 
 /*
  * sync - wait for all in-flight requests to flush.  avoid starvation.
@@ -1412,7 +1186,6 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc)
 	mutex_unlock(&osdc->request_mutex);
 	dout("sync done (thru tid %llu)\n", last_tid);
 }
-EXPORT_SYMBOL(ceph_osdc_sync);
 
 /*
  * init, shutdown
@@ -1438,7 +1211,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
 
 	schedule_delayed_work(&osdc->osds_timeout_work,
-	   round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
+	   round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ));
 
 	err = -ENOMEM;
 	osdc->req_mempool = mempool_create_kmalloc_pool(10,
@@ -1464,7 +1237,6 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 out:
 	return err;
 }
-EXPORT_SYMBOL(ceph_osdc_init);
 
 void ceph_osdc_stop(struct ceph_osd_client *osdc)
 {
@@ -1479,7 +1251,6 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
 	ceph_msgpool_destroy(&osdc->msgpool_op);
 	ceph_msgpool_destroy(&osdc->msgpool_op_reply);
 }
-EXPORT_SYMBOL(ceph_osdc_stop);
 
 /*
  * Read some contiguous pages.  If we cross a stripe boundary, shorten
@@ -1517,7 +1288,6 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
 	dout("readpages result %d\n", rc);
 	return rc;
 }
-EXPORT_SYMBOL(ceph_osdc_readpages);
 
 /*
  * do a synchronous write on N pages
@@ -1560,7 +1330,6 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 	dout("writepages result %d\n", rc);
 	return rc;
 }
-EXPORT_SYMBOL(ceph_osdc_writepages);
 
 /*
  * handle incoming message
@@ -1651,9 +1420,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
 		}
 		m->pages = req->r_pages;
 		m->nr_pages = req->r_num_pages;
-#ifdef CONFIG_BLOCK
-		m->bio = req->r_bio;
-#endif
 	}
 	*skip = 0;
 	req->r_con_filling_msg = ceph_con_get(con);
diff --git a/trunk/include/linux/ceph/osd_client.h b/trunk/fs/ceph/osd_client.h
similarity index 76%
rename from trunk/include/linux/ceph/osd_client.h
rename to trunk/fs/ceph/osd_client.h
index 6c91fb032c39..ce776989ef6a 100644
--- a/trunk/include/linux/ceph/osd_client.h
+++ b/trunk/fs/ceph/osd_client.h
@@ -15,7 +15,6 @@ struct ceph_snap_context;
 struct ceph_osd_request;
 struct ceph_osd_client;
 struct ceph_authorizer;
-struct ceph_pagelist;
 
 /*
  * completion callback for async writepages
@@ -69,7 +68,6 @@ struct ceph_osd_request {
 	struct list_head  r_unsafe_item;
 
 	struct inode *r_inode;         	      /* for use by callbacks */
-	void *r_priv;			      /* ditto */
 
 	char              r_oid[40];          /* object name */
 	int               r_oid_len;
@@ -82,11 +80,6 @@ struct ceph_osd_request {
 	struct page     **r_pages;            /* pages for data payload */
 	int               r_pages_from_pool;
 	int               r_own_pages;        /* if true, i own page list */
-#ifdef CONFIG_BLOCK
-	struct bio       *r_bio;	      /* instead of pages */
-#endif
-
-	struct ceph_pagelist *r_trail;	      /* trailing part of the data */
 };
 
 struct ceph_osd_client {
@@ -117,42 +110,6 @@ struct ceph_osd_client {
 	struct ceph_msgpool	msgpool_op_reply;
 };
 
-struct ceph_osd_req_op {
-	u16 op;           /* CEPH_OSD_OP_* */
-	u32 flags;        /* CEPH_OSD_FLAG_* */
-	union {
-		struct {
-			u64 offset, length;
-			u64 truncate_size;
-			u32 truncate_seq;
-		} extent;
-		struct {
-			const char *name;
-			u32 name_len;
-			const char  *val;
-			u32 value_len;
-			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
-			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
-		} xattr;
-		struct {
-			const char *class_name;
-			__u8 class_len;
-			const char *method_name;
-			__u8 method_len;
-			__u8 argc;
-			const char *indata;
-			u32 indata_len;
-		} cls;
-		struct {
-			u64 cookie, count;
-		} pgls;
-	        struct {
-		        u64 snapid;
-	        } snap;
-	};
-	u32 payload_len;
-};
-
 extern int ceph_osdc_init(struct ceph_osd_client *osdc,
 			  struct ceph_client *client);
 extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
@@ -162,30 +119,6 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 				 struct ceph_msg *msg);
 
-extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
-			struct ceph_file_layout *layout,
-			u64 snapid,
-			u64 off, u64 *plen, u64 *bno,
-			struct ceph_osd_request *req,
-			struct ceph_osd_req_op *op);
-
-extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
-					       int flags,
-					       struct ceph_snap_context *snapc,
-					       struct ceph_osd_req_op *ops,
-					       bool use_mempool,
-					       gfp_t gfp_flags,
-					       struct page **pages,
-					       struct bio *bio);
-
-extern void ceph_osdc_build_request(struct ceph_osd_request *req,
-				    u64 off, u64 *plen,
-				    struct ceph_osd_req_op *src_ops,
-				    struct ceph_snap_context *snapc,
-				    struct timespec *mtime,
-				    const char *oid,
-				    int oid_len);
-
 extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
 				      struct ceph_file_layout *layout,
 				      struct ceph_vino vino,
diff --git a/trunk/net/ceph/osdmap.c b/trunk/fs/ceph/osdmap.c
similarity index 97%
rename from trunk/net/ceph/osdmap.c
rename to trunk/fs/ceph/osdmap.c
index d73f3f6efa36..e31f118f1392 100644
--- a/trunk/net/ceph/osdmap.c
+++ b/trunk/fs/ceph/osdmap.c
@@ -1,15 +1,14 @@
 
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <asm/div64.h>
 
-#include <linux/ceph/libceph.h>
-#include <linux/ceph/osdmap.h>
-#include <linux/ceph/decode.h>
-#include <linux/crush/hash.h>
-#include <linux/crush/mapper.h>
+#include "super.h"
+#include "osdmap.h"
+#include "crush/hash.h"
+#include "crush/mapper.h"
+#include "decode.h"
 
 char *ceph_osdmap_state_str(char *str, int len, int state)
 {
@@ -418,20 +417,6 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
 	return NULL;
 }
 
-int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
-{
-	struct rb_node *rbp;
-
-	for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) {
-		struct ceph_pg_pool_info *pi =
-			rb_entry(rbp, struct ceph_pg_pool_info, node);
-		if (pi->name && strcmp(pi->name, name) == 0)
-			return pi->id;
-	}
-	return -ENOENT;
-}
-EXPORT_SYMBOL(ceph_pg_poolid_by_name);
-
 static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
 {
 	rb_erase(&pi->node, root);
@@ -981,7 +966,6 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 
 	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
 }
-EXPORT_SYMBOL(ceph_calc_file_object_mapping);
 
 /*
  * calculate an object layout (i.e. pgid) from an oid,
@@ -1027,7 +1011,6 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
 	ol->ol_stripe_unit = fl->fl_object_stripe_unit;
 	return 0;
 }
-EXPORT_SYMBOL(ceph_calc_object_layout);
 
 /*
  * Calculate raw osd vector for the given pgid.  Return pointer to osd
@@ -1125,4 +1108,3 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
 			return osds[i];
 	return -1;
 }
-EXPORT_SYMBOL(ceph_calc_pg_primary);
diff --git a/trunk/include/linux/ceph/osdmap.h b/trunk/fs/ceph/osdmap.h
similarity index 97%
rename from trunk/include/linux/ceph/osdmap.h
rename to trunk/fs/ceph/osdmap.h
index ba4c205cbb01..970b547e510d 100644
--- a/trunk/include/linux/ceph/osdmap.h
+++ b/trunk/fs/ceph/osdmap.h
@@ -4,7 +4,7 @@
 #include <linux/rbtree.h>
 #include "types.h"
 #include "ceph_fs.h"
-#include <linux/crush/crush.h>
+#include "crush/crush.h"
 
 /*
  * The osd map describes the current membership of the osd cluster and
@@ -125,6 +125,4 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
 				struct ceph_pg pgid);
 
-extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
-
 #endif
diff --git a/trunk/fs/ceph/pagelist.c b/trunk/fs/ceph/pagelist.c
new file mode 100644
index 000000000000..46a368b6dce5
--- /dev/null
+++ b/trunk/fs/ceph/pagelist.c
@@ -0,0 +1,63 @@
+
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+
+#include "pagelist.h"
+
+static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
+{
+	struct page *page = list_entry(pl->head.prev, struct page,
+				       lru);
+	kunmap(page);
+}
+
+int ceph_pagelist_release(struct ceph_pagelist *pl)
+{
+	if (pl->mapped_tail)
+		ceph_pagelist_unmap_tail(pl);
+
+	while (!list_empty(&pl->head)) {
+		struct page *page = list_first_entry(&pl->head, struct page,
+						     lru);
+		list_del(&page->lru);
+		__free_page(page);
+	}
+	return 0;
+}
+
+static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
+{
+	struct page *page = __page_cache_alloc(GFP_NOFS);
+	if (!page)
+		return -ENOMEM;
+	pl->room += PAGE_SIZE;
+	list_add_tail(&page->lru, &pl->head);
+	if (pl->mapped_tail)
+		ceph_pagelist_unmap_tail(pl);
+	pl->mapped_tail = kmap(page);
+	return 0;
+}
+
+int ceph_pagelist_append(struct ceph_pagelist *pl, void *buf, size_t len)
+{
+	while (pl->room < len) {
+		size_t bit = pl->room;
+		int ret;
+
+		memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK),
+		       buf, bit);
+		pl->length += bit;
+		pl->room -= bit;
+		buf += bit;
+		len -= bit;
+		ret = ceph_pagelist_addpage(pl);
+		if (ret)
+			return ret;
+	}
+
+	memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len);
+	pl->length += len;
+	pl->room -= len;
+	return 0;
+}
diff --git a/trunk/include/linux/ceph/pagelist.h b/trunk/fs/ceph/pagelist.h
similarity index 62%
rename from trunk/include/linux/ceph/pagelist.h
rename to trunk/fs/ceph/pagelist.h
index 9660d6b0a35d..e8a4187e1087 100644
--- a/trunk/include/linux/ceph/pagelist.h
+++ b/trunk/fs/ceph/pagelist.h
@@ -8,14 +8,6 @@ struct ceph_pagelist {
 	void *mapped_tail;
 	size_t length;
 	size_t room;
-	struct list_head free_list;
-	size_t num_pages_free;
-};
-
-struct ceph_pagelist_cursor {
-	struct ceph_pagelist *pl;   /* pagelist, for error checking */
-	struct list_head *page_lru; /* page in list */
-	size_t room;		    /* room remaining to reset to */
 };
 
 static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
@@ -24,23 +16,10 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
 	pl->mapped_tail = NULL;
 	pl->length = 0;
 	pl->room = 0;
-	INIT_LIST_HEAD(&pl->free_list);
-	pl->num_pages_free = 0;
 }
-
 extern int ceph_pagelist_release(struct ceph_pagelist *pl);
 
-extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l);
-
-extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space);
-
-extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl);
-
-extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
-				     struct ceph_pagelist_cursor *c);
-
-extern int ceph_pagelist_truncate(struct ceph_pagelist *pl,
-				  struct ceph_pagelist_cursor *c);
+extern int ceph_pagelist_append(struct ceph_pagelist *pl, void *d, size_t l);
 
 static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v)
 {
diff --git a/trunk/include/linux/ceph/rados.h b/trunk/fs/ceph/rados.h
similarity index 100%
rename from trunk/include/linux/ceph/rados.h
rename to trunk/fs/ceph/rados.h
diff --git a/trunk/fs/ceph/snap.c b/trunk/fs/ceph/snap.c
index 39c243acd062..190b6c4a6f2b 100644
--- a/trunk/fs/ceph/snap.c
+++ b/trunk/fs/ceph/snap.c
@@ -1,12 +1,10 @@
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/sort.h>
 #include <linux/slab.h>
 
 #include "super.h"
-#include "mds_client.h"
-
-#include <linux/ceph/decode.h>
+#include "decode.h"
 
 /*
  * Snapshots in ceph are driven in large part by cooperation from the
@@ -528,7 +526,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 			    struct ceph_cap_snap *capsnap)
 {
 	struct inode *inode = &ci->vfs_inode;
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
 
 	BUG_ON(capsnap->writing);
 	capsnap->size = inode->i_size;
@@ -749,7 +747,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 		      struct ceph_mds_session *session,
 		      struct ceph_msg *msg)
 {
-	struct super_block *sb = mdsc->fsc->sb;
+	struct super_block *sb = mdsc->client->sb;
 	int mds = session->s_mds;
 	u64 split;
 	int op;
diff --git a/trunk/fs/ceph/super.c b/trunk/fs/ceph/super.c
index d6e0e0421891..9922628532b2 100644
--- a/trunk/fs/ceph/super.c
+++ b/trunk/fs/ceph/super.c
@@ -1,5 +1,5 @@
 
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <linux/backing-dev.h>
 #include <linux/ctype.h>
@@ -15,13 +15,10 @@
 #include <linux/statfs.h>
 #include <linux/string.h>
 
+#include "decode.h"
 #include "super.h"
-#include "mds_client.h"
-
-#include <linux/ceph/decode.h>
-#include <linux/ceph/mon_client.h>
-#include <linux/ceph/auth.h>
-#include <linux/ceph/debugfs.h>
+#include "mon_client.h"
+#include "auth.h"
 
 /*
  * Ceph superblock operations
@@ -29,22 +26,36 @@
  * Handle the basics of mounting, unmounting.
  */
 
+
+/*
+ * find filename portion of a path (/foo/bar/baz -> baz)
+ */
+const char *ceph_file_part(const char *s, int len)
+{
+	const char *e = s + len;
+
+	while (e != s && *(e-1) != '/')
+		e--;
+	return e;
+}
+
+
 /*
  * super ops
  */
 static void ceph_put_super(struct super_block *s)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+	struct ceph_client *client = ceph_sb_to_client(s);
 
 	dout("put_super\n");
-	ceph_mdsc_close_sessions(fsc->mdsc);
+	ceph_mdsc_close_sessions(&client->mdsc);
 
 	/*
 	 * ensure we release the bdi before put_anon_super releases
 	 * the device name.
 	 */
-	if (s->s_bdi == &fsc->backing_dev_info) {
-		bdi_unregister(&fsc->backing_dev_info);
+	if (s->s_bdi == &client->backing_dev_info) {
+		bdi_unregister(&client->backing_dev_info);
 		s->s_bdi = NULL;
 	}
 
@@ -53,14 +64,14 @@ static void ceph_put_super(struct super_block *s)
 
 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
-	struct ceph_monmap *monmap = fsc->client->monc.monmap;
+	struct ceph_client *client = ceph_inode_to_client(dentry->d_inode);
+	struct ceph_monmap *monmap = client->monc.monmap;
 	struct ceph_statfs st;
 	u64 fsid;
 	int err;
 
 	dout("statfs\n");
-	err = ceph_monc_do_statfs(&fsc->client->monc, &st);
+	err = ceph_monc_do_statfs(&client->monc, &st);
 	if (err < 0)
 		return err;
 
@@ -93,28 +104,238 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 static int ceph_sync_fs(struct super_block *sb, int wait)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+	struct ceph_client *client = ceph_sb_to_client(sb);
 
 	if (!wait) {
 		dout("sync_fs (non-blocking)\n");
-		ceph_flush_dirty_caps(fsc->mdsc);
+		ceph_flush_dirty_caps(&client->mdsc);
 		dout("sync_fs (non-blocking) done\n");
 		return 0;
 	}
 
 	dout("sync_fs (blocking)\n");
-	ceph_osdc_sync(&fsc->client->osdc);
-	ceph_mdsc_sync(fsc->mdsc);
+	ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc);
+	ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc);
 	dout("sync_fs (blocking) done\n");
 	return 0;
 }
 
+static int default_congestion_kb(void)
+{
+	int congestion_kb;
+
+	/*
+	 * Copied from NFS
+	 *
+	 * congestion size, scale with available memory.
+	 *
+	 *  64MB:    8192k
+	 * 128MB:   11585k
+	 * 256MB:   16384k
+	 * 512MB:   23170k
+	 *   1GB:   32768k
+	 *   2GB:   46340k
+	 *   4GB:   65536k
+	 *   8GB:   92681k
+	 *  16GB:  131072k
+	 *
+	 * This allows larger machines to have larger/more transfers.
+	 * Limit the default to 256M
+	 */
+	congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
+	if (congestion_kb > 256*1024)
+		congestion_kb = 256*1024;
+
+	return congestion_kb;
+}
+
+/**
+ * ceph_show_options - Show mount options in /proc/mounts
+ * @m: seq_file to write to
+ * @mnt: mount descriptor
+ */
+static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct ceph_client *client = ceph_sb_to_client(mnt->mnt_sb);
+	struct ceph_mount_args *args = client->mount_args;
+
+	if (args->flags & CEPH_OPT_FSID)
+		seq_printf(m, ",fsid=%pU", &args->fsid);
+	if (args->flags & CEPH_OPT_NOSHARE)
+		seq_puts(m, ",noshare");
+	if (args->flags & CEPH_OPT_DIRSTAT)
+		seq_puts(m, ",dirstat");
+	if ((args->flags & CEPH_OPT_RBYTES) == 0)
+		seq_puts(m, ",norbytes");
+	if (args->flags & CEPH_OPT_NOCRC)
+		seq_puts(m, ",nocrc");
+	if (args->flags & CEPH_OPT_NOASYNCREADDIR)
+		seq_puts(m, ",noasyncreaddir");
+
+	if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
+		seq_printf(m, ",mount_timeout=%d", args->mount_timeout);
+	if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
+		seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl);
+	if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
+		seq_printf(m, ",osdtimeout=%d", args->osd_timeout);
+	if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
+		seq_printf(m, ",osdkeepalivetimeout=%d",
+			 args->osd_keepalive_timeout);
+	if (args->wsize)
+		seq_printf(m, ",wsize=%d", args->wsize);
+	if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
+		seq_printf(m, ",rsize=%d", args->rsize);
+	if (args->congestion_kb != default_congestion_kb())
+		seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb);
+	if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
+		seq_printf(m, ",caps_wanted_delay_min=%d",
+			 args->caps_wanted_delay_min);
+	if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
+		seq_printf(m, ",caps_wanted_delay_max=%d",
+			   args->caps_wanted_delay_max);
+	if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
+		seq_printf(m, ",cap_release_safety=%d",
+			   args->cap_release_safety);
+	if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT)
+		seq_printf(m, ",readdir_max_entries=%d", args->max_readdir);
+	if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
+		seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes);
+	if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
+		seq_printf(m, ",snapdirname=%s", args->snapdir_name);
+	if (args->name)
+		seq_printf(m, ",name=%s", args->name);
+	if (args->secret)
+		seq_puts(m, ",secret=<hidden>");
+	return 0;
+}
+
+/*
+ * caches
+ */
+struct kmem_cache *ceph_inode_cachep;
+struct kmem_cache *ceph_cap_cachep;
+struct kmem_cache *ceph_dentry_cachep;
+struct kmem_cache *ceph_file_cachep;
+
+static void ceph_inode_init_once(void *foo)
+{
+	struct ceph_inode_info *ci = foo;
+	inode_init_once(&ci->vfs_inode);
+}
+
+static int __init init_caches(void)
+{
+	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
+				      sizeof(struct ceph_inode_info),
+				      __alignof__(struct ceph_inode_info),
+				      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+				      ceph_inode_init_once);
+	if (ceph_inode_cachep == NULL)
+		return -ENOMEM;
+
+	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
+				     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+	if (ceph_cap_cachep == NULL)
+		goto bad_cap;
+
+	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
+					SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+	if (ceph_dentry_cachep == NULL)
+		goto bad_dentry;
+
+	ceph_file_cachep = KMEM_CACHE(ceph_file_info,
+				      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+	if (ceph_file_cachep == NULL)
+		goto bad_file;
+
+	return 0;
+
+bad_file:
+	kmem_cache_destroy(ceph_dentry_cachep);
+bad_dentry:
+	kmem_cache_destroy(ceph_cap_cachep);
+bad_cap:
+	kmem_cache_destroy(ceph_inode_cachep);
+	return -ENOMEM;
+}
+
+static void destroy_caches(void)
+{
+	kmem_cache_destroy(ceph_inode_cachep);
+	kmem_cache_destroy(ceph_cap_cachep);
+	kmem_cache_destroy(ceph_dentry_cachep);
+	kmem_cache_destroy(ceph_file_cachep);
+}
+
+
+/*
+ * ceph_umount_begin - initiate forced umount.  Tear down down the
+ * mount, skipping steps that may hang while waiting for server(s).
+ */
+static void ceph_umount_begin(struct super_block *sb)
+{
+	struct ceph_client *client = ceph_sb_to_client(sb);
+
+	dout("ceph_umount_begin - starting forced umount\n");
+	if (!client)
+		return;
+	client->mount_state = CEPH_MOUNT_SHUTDOWN;
+	return;
+}
+
+static const struct super_operations ceph_super_ops = {
+	.alloc_inode	= ceph_alloc_inode,
+	.destroy_inode	= ceph_destroy_inode,
+	.write_inode    = ceph_write_inode,
+	.sync_fs        = ceph_sync_fs,
+	.put_super	= ceph_put_super,
+	.show_options   = ceph_show_options,
+	.statfs		= ceph_statfs,
+	.umount_begin   = ceph_umount_begin,
+};
+
+
+const char *ceph_msg_type_name(int type)
+{
+	switch (type) {
+	case CEPH_MSG_SHUTDOWN: return "shutdown";
+	case CEPH_MSG_PING: return "ping";
+	case CEPH_MSG_AUTH: return "auth";
+	case CEPH_MSG_AUTH_REPLY: return "auth_reply";
+	case CEPH_MSG_MON_MAP: return "mon_map";
+	case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
+	case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
+	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
+	case CEPH_MSG_STATFS: return "statfs";
+	case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
+	case CEPH_MSG_MDS_MAP: return "mds_map";
+	case CEPH_MSG_CLIENT_SESSION: return "client_session";
+	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
+	case CEPH_MSG_CLIENT_REQUEST: return "client_request";
+	case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
+	case CEPH_MSG_CLIENT_REPLY: return "client_reply";
+	case CEPH_MSG_CLIENT_CAPS: return "client_caps";
+	case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
+	case CEPH_MSG_CLIENT_SNAP: return "client_snap";
+	case CEPH_MSG_CLIENT_LEASE: return "client_lease";
+	case CEPH_MSG_OSD_MAP: return "osd_map";
+	case CEPH_MSG_OSD_OP: return "osd_op";
+	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
+	default: return "unknown";
+	}
+}
+
+
 /*
  * mount options
  */
 enum {
 	Opt_wsize,
 	Opt_rsize,
+	Opt_osdtimeout,
+	Opt_osdkeepalivetimeout,
+	Opt_mount_timeout,
+	Opt_osd_idle_ttl,
 	Opt_caps_wanted_delay_min,
 	Opt_caps_wanted_delay_max,
 	Opt_cap_release_safety,
@@ -123,19 +344,29 @@ enum {
 	Opt_congestion_kb,
 	Opt_last_int,
 	/* int args above */
+	Opt_fsid,
 	Opt_snapdirname,
+	Opt_name,
+	Opt_secret,
 	Opt_last_string,
 	/* string args above */
+	Opt_ip,
+	Opt_noshare,
 	Opt_dirstat,
 	Opt_nodirstat,
 	Opt_rbytes,
 	Opt_norbytes,
+	Opt_nocrc,
 	Opt_noasyncreaddir,
 };
 
-static match_table_t fsopt_tokens = {
+static match_table_t arg_tokens = {
 	{Opt_wsize, "wsize=%d"},
 	{Opt_rsize, "rsize=%d"},
+	{Opt_osdtimeout, "osdtimeout=%d"},
+	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
+	{Opt_mount_timeout, "mount_timeout=%d"},
+	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
 	{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
 	{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
 	{Opt_cap_release_safety, "cap_release_safety=%d"},
@@ -143,459 +374,403 @@ static match_table_t fsopt_tokens = {
 	{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
 	{Opt_congestion_kb, "write_congestion_kb=%d"},
 	/* int args above */
+	{Opt_fsid, "fsid=%s"},
 	{Opt_snapdirname, "snapdirname=%s"},
+	{Opt_name, "name=%s"},
+	{Opt_secret, "secret=%s"},
 	/* string args above */
+	{Opt_ip, "ip=%s"},
+	{Opt_noshare, "noshare"},
 	{Opt_dirstat, "dirstat"},
 	{Opt_nodirstat, "nodirstat"},
 	{Opt_rbytes, "rbytes"},
 	{Opt_norbytes, "norbytes"},
+	{Opt_nocrc, "nocrc"},
 	{Opt_noasyncreaddir, "noasyncreaddir"},
 	{-1, NULL}
 };
 
-static int parse_fsopt_token(char *c, void *private)
+static int parse_fsid(const char *str, struct ceph_fsid *fsid)
 {
-	struct ceph_mount_options *fsopt = private;
-	substring_t argstr[MAX_OPT_ARGS];
-	int token, intval, ret;
-
-	token = match_token((char *)c, fsopt_tokens, argstr);
-	if (token < 0)
-		return -EINVAL;
-
-	if (token < Opt_last_int) {
-		ret = match_int(&argstr[0], &intval);
-		if (ret < 0) {
-			pr_err("bad mount option arg (not int) "
-			       "at '%s'\n", c);
-			return ret;
+	int i = 0;
+	char tmp[3];
+	int err = -EINVAL;
+	int d;
+
+	dout("parse_fsid '%s'\n", str);
+	tmp[2] = 0;
+	while (*str && i < 16) {
+		if (ispunct(*str)) {
+			str++;
+			continue;
 		}
-		dout("got int token %d val %d\n", token, intval);
-	} else if (token > Opt_last_int && token < Opt_last_string) {
-		dout("got string token %d val %s\n", token,
-		     argstr[0].from);
-	} else {
-		dout("got token %d\n", token);
-	}
-
-	switch (token) {
-	case Opt_snapdirname:
-		kfree(fsopt->snapdir_name);
-		fsopt->snapdir_name = kstrndup(argstr[0].from,
-					       argstr[0].to-argstr[0].from,
-					       GFP_KERNEL);
-		if (!fsopt->snapdir_name)
-			return -ENOMEM;
-		break;
-
-		/* misc */
-	case Opt_wsize:
-		fsopt->wsize = intval;
-		break;
-	case Opt_rsize:
-		fsopt->rsize = intval;
-		break;
-	case Opt_caps_wanted_delay_min:
-		fsopt->caps_wanted_delay_min = intval;
-		break;
-	case Opt_caps_wanted_delay_max:
-		fsopt->caps_wanted_delay_max = intval;
-		break;
-	case Opt_readdir_max_entries:
-		fsopt->max_readdir = intval;
-		break;
-	case Opt_readdir_max_bytes:
-		fsopt->max_readdir_bytes = intval;
-		break;
-	case Opt_congestion_kb:
-		fsopt->congestion_kb = intval;
-		break;
-	case Opt_dirstat:
-		fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
-		break;
-	case Opt_nodirstat:
-		fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
-		break;
-	case Opt_rbytes:
-		fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
-		break;
-	case Opt_norbytes:
-		fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
-		break;
-	case Opt_noasyncreaddir:
-		fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
-		break;
-	default:
-		BUG_ON(token);
+		if (!isxdigit(str[0]) || !isxdigit(str[1]))
+			break;
+		tmp[0] = str[0];
+		tmp[1] = str[1];
+		if (sscanf(tmp, "%x", &d) < 1)
+			break;
+		fsid->fsid[i] = d & 0xff;
+		i++;
+		str += 2;
 	}
-	return 0;
-}
-
-static void destroy_mount_options(struct ceph_mount_options *args)
-{
-	dout("destroy_mount_options %p\n", args);
-	kfree(args->snapdir_name);
-	kfree(args);
-}
-
-static int strcmp_null(const char *s1, const char *s2)
-{
-	if (!s1 && !s2)
-		return 0;
-	if (s1 && !s2)
-		return -1;
-	if (!s1 && s2)
-		return 1;
-	return strcmp(s1, s2);
-}
-
-static int compare_mount_options(struct ceph_mount_options *new_fsopt,
-				 struct ceph_options *new_opt,
-				 struct ceph_fs_client *fsc)
-{
-	struct ceph_mount_options *fsopt1 = new_fsopt;
-	struct ceph_mount_options *fsopt2 = fsc->mount_options;
-	int ofs = offsetof(struct ceph_mount_options, snapdir_name);
-	int ret;
-
-	ret = memcmp(fsopt1, fsopt2, ofs);
-	if (ret)
-		return ret;
-
-	ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
-	if (ret)
-		return ret;
 
-	return ceph_compare_options(new_opt, fsc->client);
+	if (i == 16)
+		err = 0;
+	dout("parse_fsid ret %d got fsid %pU", err, fsid);
+	return err;
 }
 
-static int parse_mount_options(struct ceph_mount_options **pfsopt,
-			       struct ceph_options **popt,
-			       int flags, char *options,
-			       const char *dev_name,
-			       const char **path)
+static struct ceph_mount_args *parse_mount_args(int flags, char *options,
+						const char *dev_name,
+						const char **path)
 {
-	struct ceph_mount_options *fsopt;
-	const char *dev_name_end;
+	struct ceph_mount_args *args;
+	const char *c;
 	int err = -ENOMEM;
+	substring_t argstr[MAX_OPT_ARGS];
 
-	fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
-	if (!fsopt)
-		return -ENOMEM;
+	args = kzalloc(sizeof(*args), GFP_KERNEL);
+	if (!args)
+		return ERR_PTR(-ENOMEM);
+	args->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*args->mon_addr),
+				 GFP_KERNEL);
+	if (!args->mon_addr)
+		goto out;
+
+	dout("parse_mount_args %p, dev_name '%s'\n", args, dev_name);
+
+	/* start with defaults */
+	args->sb_flags = flags;
+	args->flags = CEPH_OPT_DEFAULT;
+	args->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
+	args->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
+	args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
+	args->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
+	args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
+	args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
+	args->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
+	args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+	args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
+	args->max_readdir = CEPH_MAX_READDIR_DEFAULT;
+	args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
+	args->congestion_kb = default_congestion_kb();
+
+	/* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
+	err = -EINVAL;
+	if (!dev_name)
+		goto out;
+	*path = strstr(dev_name, ":/");
+	if (*path == NULL) {
+		pr_err("device name is missing path (no :/ in %s)\n",
+		       dev_name);
+		goto out;
+	}
 
-	dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
-
-        fsopt->sb_flags = flags;
-        fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
-
-        fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
-        fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
-        fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
-        fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
-        fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
-        fsopt->congestion_kb = default_congestion_kb();
-	
-        /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
-        err = -EINVAL;
-        if (!dev_name)
-                goto out;
-        *path = strstr(dev_name, ":/");
-        if (*path == NULL) {
-                pr_err("device name is missing path (no :/ in %s)\n",
-                       dev_name);
-                goto out;
-        }
-	dev_name_end = *path;
-	dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
+	/* get mon ip(s) */
+	err = ceph_parse_ips(dev_name, *path, args->mon_addr,
+			     CEPH_MAX_MON, &args->num_mon);
+	if (err < 0)
+		goto out;
 
 	/* path on server */
 	*path += 2;
 	dout("server path '%s'\n", *path);
 
-	err = ceph_parse_options(popt, options, dev_name, dev_name_end,
-				 parse_fsopt_token, (void *)fsopt);
-	if (err)
-		goto out;
-
-	/* success */
-	*pfsopt = fsopt;
-	return 0;
+	/* parse mount options */
+	while ((c = strsep(&options, ",")) != NULL) {
+		int token, intval, ret;
+		if (!*c)
+			continue;
+		err = -EINVAL;
+		token = match_token((char *)c, arg_tokens, argstr);
+		if (token < 0) {
+			pr_err("bad mount option at '%s'\n", c);
+			goto out;
+		}
+		if (token < Opt_last_int) {
+			ret = match_int(&argstr[0], &intval);
+			if (ret < 0) {
+				pr_err("bad mount option arg (not int) "
+				       "at '%s'\n", c);
+				continue;
+			}
+			dout("got int token %d val %d\n", token, intval);
+		} else if (token > Opt_last_int && token < Opt_last_string) {
+			dout("got string token %d val %s\n", token,
+			     argstr[0].from);
+		} else {
+			dout("got token %d\n", token);
+		}
+		switch (token) {
+		case Opt_ip:
+			err = ceph_parse_ips(argstr[0].from,
+					     argstr[0].to,
+					     &args->my_addr,
+					     1, NULL);
+			if (err < 0)
+				goto out;
+			args->flags |= CEPH_OPT_MYIP;
+			break;
+
+		case Opt_fsid:
+			err = parse_fsid(argstr[0].from, &args->fsid);
+			if (err == 0)
+				args->flags |= CEPH_OPT_FSID;
+			break;
+		case Opt_snapdirname:
+			kfree(args->snapdir_name);
+			args->snapdir_name = kstrndup(argstr[0].from,
+					      argstr[0].to-argstr[0].from,
+					      GFP_KERNEL);
+			break;
+		case Opt_name:
+			args->name = kstrndup(argstr[0].from,
+					      argstr[0].to-argstr[0].from,
+					      GFP_KERNEL);
+			break;
+		case Opt_secret:
+			args->secret = kstrndup(argstr[0].from,
+						argstr[0].to-argstr[0].from,
+						GFP_KERNEL);
+			break;
+
+			/* misc */
+		case Opt_wsize:
+			args->wsize = intval;
+			break;
+		case Opt_rsize:
+			args->rsize = intval;
+			break;
+		case Opt_osdtimeout:
+			args->osd_timeout = intval;
+			break;
+		case Opt_osdkeepalivetimeout:
+			args->osd_keepalive_timeout = intval;
+			break;
+		case Opt_osd_idle_ttl:
+			args->osd_idle_ttl = intval;
+			break;
+		case Opt_mount_timeout:
+			args->mount_timeout = intval;
+			break;
+		case Opt_caps_wanted_delay_min:
+			args->caps_wanted_delay_min = intval;
+			break;
+		case Opt_caps_wanted_delay_max:
+			args->caps_wanted_delay_max = intval;
+			break;
+		case Opt_readdir_max_entries:
+			args->max_readdir = intval;
+			break;
+		case Opt_readdir_max_bytes:
+			args->max_readdir_bytes = intval;
+			break;
+		case Opt_congestion_kb:
+			args->congestion_kb = intval;
+			break;
+
+		case Opt_noshare:
+			args->flags |= CEPH_OPT_NOSHARE;
+			break;
+
+		case Opt_dirstat:
+			args->flags |= CEPH_OPT_DIRSTAT;
+			break;
+		case Opt_nodirstat:
+			args->flags &= ~CEPH_OPT_DIRSTAT;
+			break;
+		case Opt_rbytes:
+			args->flags |= CEPH_OPT_RBYTES;
+			break;
+		case Opt_norbytes:
+			args->flags &= ~CEPH_OPT_RBYTES;
+			break;
+		case Opt_nocrc:
+			args->flags |= CEPH_OPT_NOCRC;
+			break;
+		case Opt_noasyncreaddir:
+			args->flags |= CEPH_OPT_NOASYNCREADDIR;
+			break;
+
+		default:
+			BUG_ON(token);
+		}
+	}
+	return args;
 
 out:
-	destroy_mount_options(fsopt);
-	return err;
-}
-
-/**
- * ceph_show_options - Show mount options in /proc/mounts
- * @m: seq_file to write to
- * @mnt: mount descriptor
- */
-static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb);
-	struct ceph_mount_options *fsopt = fsc->mount_options;
-	struct ceph_options *opt = fsc->client->options;
-
-	if (opt->flags & CEPH_OPT_FSID)
-		seq_printf(m, ",fsid=%pU", &opt->fsid);
-	if (opt->flags & CEPH_OPT_NOSHARE)
-		seq_puts(m, ",noshare");
-	if (opt->flags & CEPH_OPT_NOCRC)
-		seq_puts(m, ",nocrc");
-
-	if (opt->name)
-		seq_printf(m, ",name=%s", opt->name);
-	if (opt->secret)
-		seq_puts(m, ",secret=<hidden>");
-
-	if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
-		seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
-	if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
-		seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
-	if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
-		seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
-	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
-		seq_printf(m, ",osdkeepalivetimeout=%d",
-			   opt->osd_keepalive_timeout);
-
-	if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
-		seq_puts(m, ",dirstat");
-	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
-		seq_puts(m, ",norbytes");
-	if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
-		seq_puts(m, ",noasyncreaddir");
-
-	if (fsopt->wsize)
-		seq_printf(m, ",wsize=%d", fsopt->wsize);
-	if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
-		seq_printf(m, ",rsize=%d", fsopt->rsize);
-	if (fsopt->congestion_kb != default_congestion_kb())
-		seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
-	if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
-		seq_printf(m, ",caps_wanted_delay_min=%d",
-			 fsopt->caps_wanted_delay_min);
-	if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
-		seq_printf(m, ",caps_wanted_delay_max=%d",
-			   fsopt->caps_wanted_delay_max);
-	if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
-		seq_printf(m, ",cap_release_safety=%d",
-			   fsopt->cap_release_safety);
-	if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
-		seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
-	if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
-		seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
-	if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
-		seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
-	return 0;
+	kfree(args->mon_addr);
+	kfree(args);
+	return ERR_PTR(err);
 }
 
-/*
- * handle any mon messages the standard library doesn't understand.
- * return error if we don't either.
- */
-static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
+static void destroy_mount_args(struct ceph_mount_args *args)
 {
-	struct ceph_fs_client *fsc = client->private;
-	int type = le16_to_cpu(msg->hdr.type);
-
-	switch (type) {
-	case CEPH_MSG_MDS_MAP:
-		ceph_mdsc_handle_map(fsc->mdsc, msg);
-		return 0;
-
-	default:
-		return -1;
-	}
+	dout("destroy_mount_args %p\n", args);
+	kfree(args->snapdir_name);
+	args->snapdir_name = NULL;
+	kfree(args->name);
+	args->name = NULL;
+	kfree(args->secret);
+	args->secret = NULL;
+	kfree(args);
 }
 
 /*
- * create a new fs client
+ * create a fresh client instance
  */
-struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
-					struct ceph_options *opt)
+static struct ceph_client *ceph_create_client(struct ceph_mount_args *args)
 {
-	struct ceph_fs_client *fsc;
+	struct ceph_client *client;
 	int err = -ENOMEM;
 
-	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
-	if (!fsc)
+	client = kzalloc(sizeof(*client), GFP_KERNEL);
+	if (client == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	fsc->client = ceph_create_client(opt, fsc);
-	if (IS_ERR(fsc->client)) {
-		err = PTR_ERR(fsc->client);
-		goto fail;
-	}
-	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
-	fsc->client->supported_features |= CEPH_FEATURE_FLOCK;
-	fsc->client->monc.want_mdsmap = 1;
+	mutex_init(&client->mount_mutex);
+
+	init_waitqueue_head(&client->auth_wq);
 
-	fsc->mount_options = fsopt;
+	client->sb = NULL;
+	client->mount_state = CEPH_MOUNT_MOUNTING;
+	client->mount_args = args;
 
-	fsc->sb = NULL;
-	fsc->mount_state = CEPH_MOUNT_MOUNTING;
+	client->msgr = NULL;
 
-	atomic_long_set(&fsc->writeback_count, 0);
+	client->auth_err = 0;
+	atomic_long_set(&client->writeback_count, 0);
 
-	err = bdi_init(&fsc->backing_dev_info);
+	err = bdi_init(&client->backing_dev_info);
 	if (err < 0)
-		goto fail_client;
+		goto fail;
 
 	err = -ENOMEM;
-	fsc->wb_wq = create_workqueue("ceph-writeback");
-	if (fsc->wb_wq == NULL)
+	client->wb_wq = create_workqueue("ceph-writeback");
+	if (client->wb_wq == NULL)
 		goto fail_bdi;
-	fsc->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid");
-	if (fsc->pg_inv_wq == NULL)
+	client->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid");
+	if (client->pg_inv_wq == NULL)
 		goto fail_wb_wq;
-	fsc->trunc_wq = create_singlethread_workqueue("ceph-trunc");
-	if (fsc->trunc_wq == NULL)
+	client->trunc_wq = create_singlethread_workqueue("ceph-trunc");
+	if (client->trunc_wq == NULL)
 		goto fail_pg_inv_wq;
 
 	/* set up mempools */
 	err = -ENOMEM;
-	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
-			      fsc->mount_options->wsize >> PAGE_CACHE_SHIFT);
-	if (!fsc->wb_pagevec_pool)
+	client->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
+			      client->mount_args->wsize >> PAGE_CACHE_SHIFT);
+	if (!client->wb_pagevec_pool)
 		goto fail_trunc_wq;
 
 	/* caps */
-	fsc->min_caps = fsopt->max_readdir;
-
-	return fsc;
+	client->min_caps = args->max_readdir;
 
+	/* subsystems */
+	err = ceph_monc_init(&client->monc, client);
+	if (err < 0)
+		goto fail_mempool;
+	err = ceph_osdc_init(&client->osdc, client);
+	if (err < 0)
+		goto fail_monc;
+	err = ceph_mdsc_init(&client->mdsc, client);
+	if (err < 0)
+		goto fail_osdc;
+	return client;
+
+fail_osdc:
+	ceph_osdc_stop(&client->osdc);
+fail_monc:
+	ceph_monc_stop(&client->monc);
+fail_mempool:
+	mempool_destroy(client->wb_pagevec_pool);
 fail_trunc_wq:
-	destroy_workqueue(fsc->trunc_wq);
+	destroy_workqueue(client->trunc_wq);
 fail_pg_inv_wq:
-	destroy_workqueue(fsc->pg_inv_wq);
+	destroy_workqueue(client->pg_inv_wq);
 fail_wb_wq:
-	destroy_workqueue(fsc->wb_wq);
+	destroy_workqueue(client->wb_wq);
 fail_bdi:
-	bdi_destroy(&fsc->backing_dev_info);
-fail_client:
-	ceph_destroy_client(fsc->client);
+	bdi_destroy(&client->backing_dev_info);
 fail:
-	kfree(fsc);
+	kfree(client);
 	return ERR_PTR(err);
 }
 
-void destroy_fs_client(struct ceph_fs_client *fsc)
+static void ceph_destroy_client(struct ceph_client *client)
 {
-	dout("destroy_fs_client %p\n", fsc);
+	dout("destroy_client %p\n", client);
 
-	destroy_workqueue(fsc->wb_wq);
-	destroy_workqueue(fsc->pg_inv_wq);
-	destroy_workqueue(fsc->trunc_wq);
+	/* unmount */
+	ceph_mdsc_stop(&client->mdsc);
+	ceph_osdc_stop(&client->osdc);
 
-	bdi_destroy(&fsc->backing_dev_info);
+	/*
+	 * make sure mds and osd connections close out before destroying
+	 * the auth module, which is needed to free those connections'
+	 * ceph_authorizers.
+	 */
+	ceph_msgr_flush();
+
+	ceph_monc_stop(&client->monc);
 
-	mempool_destroy(fsc->wb_pagevec_pool);
+	ceph_debugfs_client_cleanup(client);
+	destroy_workqueue(client->wb_wq);
+	destroy_workqueue(client->pg_inv_wq);
+	destroy_workqueue(client->trunc_wq);
 
-	destroy_mount_options(fsc->mount_options);
+	bdi_destroy(&client->backing_dev_info);
 
-	ceph_fs_debugfs_cleanup(fsc);
+	if (client->msgr)
+		ceph_messenger_destroy(client->msgr);
+	mempool_destroy(client->wb_pagevec_pool);
 
-	ceph_destroy_client(fsc->client);
+	destroy_mount_args(client->mount_args);
 
-	kfree(fsc);
-	dout("destroy_fs_client %p done\n", fsc);
+	kfree(client);
+	dout("destroy_client %p done\n", client);
 }
 
 /*
- * caches
+ * Initially learn our fsid, or verify an fsid matches.
  */
-struct kmem_cache *ceph_inode_cachep;
-struct kmem_cache *ceph_cap_cachep;
-struct kmem_cache *ceph_dentry_cachep;
-struct kmem_cache *ceph_file_cachep;
-
-static void ceph_inode_init_once(void *foo)
+int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
 {
-	struct ceph_inode_info *ci = foo;
-	inode_init_once(&ci->vfs_inode);
-}
-
-static int __init init_caches(void)
-{
-	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
-				      sizeof(struct ceph_inode_info),
-				      __alignof__(struct ceph_inode_info),
-				      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
-				      ceph_inode_init_once);
-	if (ceph_inode_cachep == NULL)
-		return -ENOMEM;
-
-	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
-				     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
-	if (ceph_cap_cachep == NULL)
-		goto bad_cap;
-
-	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
-					SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
-	if (ceph_dentry_cachep == NULL)
-		goto bad_dentry;
-
-	ceph_file_cachep = KMEM_CACHE(ceph_file_info,
-				      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
-	if (ceph_file_cachep == NULL)
-		goto bad_file;
-
+	if (client->have_fsid) {
+		if (ceph_fsid_compare(&client->fsid, fsid)) {
+			pr_err("bad fsid, had %pU got %pU",
+			       &client->fsid, fsid);
+			return -1;
+		}
+	} else {
+		pr_info("client%lld fsid %pU\n", client->monc.auth->global_id,
+			fsid);
+		memcpy(&client->fsid, fsid, sizeof(*fsid));
+		ceph_debugfs_client_init(client);
+		client->have_fsid = true;
+	}
 	return 0;
-
-bad_file:
-	kmem_cache_destroy(ceph_dentry_cachep);
-bad_dentry:
-	kmem_cache_destroy(ceph_cap_cachep);
-bad_cap:
-	kmem_cache_destroy(ceph_inode_cachep);
-	return -ENOMEM;
 }
 
-static void destroy_caches(void)
-{
-	kmem_cache_destroy(ceph_inode_cachep);
-	kmem_cache_destroy(ceph_cap_cachep);
-	kmem_cache_destroy(ceph_dentry_cachep);
-	kmem_cache_destroy(ceph_file_cachep);
-}
-
-
 /*
- * ceph_umount_begin - initiate forced umount.  Tear down down the
- * mount, skipping steps that may hang while waiting for server(s).
+ * true if we have the mon map (and have thus joined the cluster)
  */
-static void ceph_umount_begin(struct super_block *sb)
+static int have_mon_and_osd_map(struct ceph_client *client)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
-
-	dout("ceph_umount_begin - starting forced umount\n");
-	if (!fsc)
-		return;
-	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
-	return;
+	return client->monc.monmap && client->monc.monmap->epoch &&
+	       client->osdc.osdmap && client->osdc.osdmap->epoch;
 }
 
-static const struct super_operations ceph_super_ops = {
-	.alloc_inode	= ceph_alloc_inode,
-	.destroy_inode	= ceph_destroy_inode,
-	.write_inode    = ceph_write_inode,
-	.sync_fs        = ceph_sync_fs,
-	.put_super	= ceph_put_super,
-	.show_options   = ceph_show_options,
-	.statfs		= ceph_statfs,
-	.umount_begin   = ceph_umount_begin,
-};
-
 /*
  * Bootstrap mount by opening the root directory.  Note the mount
  * @started time from caller, and time out if this takes too long.
  */
-static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
+static struct dentry *open_root_dentry(struct ceph_client *client,
 				       const char *path,
 				       unsigned long started)
 {
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct ceph_mds_request *req = NULL;
 	int err;
 	struct dentry *root;
@@ -609,14 +784,14 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 	req->r_ino1.ino = CEPH_INO_ROOT;
 	req->r_ino1.snap = CEPH_NOSNAP;
 	req->r_started = started;
-	req->r_timeout = fsc->client->options->mount_timeout * HZ;
+	req->r_timeout = client->mount_args->mount_timeout * HZ;
 	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
 	req->r_num_caps = 2;
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 	if (err == 0) {
 		dout("open_root_inode success\n");
 		if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
-		    fsc->sb->s_root == NULL)
+		    client->sb->s_root == NULL)
 			root = d_alloc_root(req->r_target_inode);
 		else
 			root = d_obtain_alias(req->r_target_inode);
@@ -629,86 +804,105 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 	return root;
 }
 
-
-
-
 /*
  * mount: join the ceph cluster, and open root directory.
  */
-static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
+static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
 		      const char *path)
 {
+	struct ceph_entity_addr *myaddr = NULL;
 	int err;
+	unsigned long timeout = client->mount_args->mount_timeout * HZ;
 	unsigned long started = jiffies;  /* note the start time */
 	struct dentry *root;
-	int first = 0;   /* first vfsmount for this super_block */
 
 	dout("mount start\n");
-	mutex_lock(&fsc->client->mount_mutex);
+	mutex_lock(&client->mount_mutex);
+
+	/* initialize the messenger */
+	if (client->msgr == NULL) {
+		if (ceph_test_opt(client, MYIP))
+			myaddr = &client->mount_args->my_addr;
+		client->msgr = ceph_messenger_create(myaddr);
+		if (IS_ERR(client->msgr)) {
+			err = PTR_ERR(client->msgr);
+			client->msgr = NULL;
+			goto out;
+		}
+		client->msgr->nocrc = ceph_test_opt(client, NOCRC);
+	}
 
-	err = __ceph_open_session(fsc->client, started);
+	/* open session, and wait for mon, mds, and osd maps */
+	err = ceph_monc_open_session(&client->monc);
 	if (err < 0)
 		goto out;
 
+	while (!have_mon_and_osd_map(client)) {
+		err = -EIO;
+		if (timeout && time_after_eq(jiffies, started + timeout))
+			goto out;
+
+		/* wait */
+		dout("mount waiting for mon_map\n");
+		err = wait_event_interruptible_timeout(client->auth_wq,
+		       have_mon_and_osd_map(client) || (client->auth_err < 0),
+		       timeout);
+		if (err == -EINTR || err == -ERESTARTSYS)
+			goto out;
+		if (client->auth_err < 0) {
+			err = client->auth_err;
+			goto out;
+		}
+	}
+
 	dout("mount opening root\n");
-	root = open_root_dentry(fsc, "", started);
+	root = open_root_dentry(client, "", started);
 	if (IS_ERR(root)) {
 		err = PTR_ERR(root);
 		goto out;
 	}
-	if (fsc->sb->s_root) {
+	if (client->sb->s_root)
 		dput(root);
-	} else {
-		fsc->sb->s_root = root;
-		first = 1;
-
-		err = ceph_fs_debugfs_init(fsc);
-		if (err < 0)
-			goto fail;
-	}
+	else
+		client->sb->s_root = root;
 
 	if (path[0] == 0) {
 		dget(root);
 	} else {
 		dout("mount opening base mountpoint\n");
-		root = open_root_dentry(fsc, path, started);
+		root = open_root_dentry(client, path, started);
 		if (IS_ERR(root)) {
 			err = PTR_ERR(root);
-			goto fail;
+			dput(client->sb->s_root);
+			client->sb->s_root = NULL;
+			goto out;
 		}
 	}
 
 	mnt->mnt_root = root;
-	mnt->mnt_sb = fsc->sb;
+	mnt->mnt_sb = client->sb;
 
-	fsc->mount_state = CEPH_MOUNT_MOUNTED;
+	client->mount_state = CEPH_MOUNT_MOUNTED;
 	dout("mount success\n");
 	err = 0;
 
 out:
-	mutex_unlock(&fsc->client->mount_mutex);
+	mutex_unlock(&client->mount_mutex);
 	return err;
-
-fail:
-	if (first) {
-		dput(fsc->sb->s_root);
-		fsc->sb->s_root = NULL;
-	}
-	goto out;
 }
 
 static int ceph_set_super(struct super_block *s, void *data)
 {
-	struct ceph_fs_client *fsc = data;
+	struct ceph_client *client = data;
 	int ret;
 
 	dout("set_super %p data %p\n", s, data);
 
-	s->s_flags = fsc->mount_options->sb_flags;
+	s->s_flags = client->mount_args->sb_flags;
 	s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
 
-	s->s_fs_info = fsc;
-	fsc->sb = s;
+	s->s_fs_info = client;
+	client->sb = s;
 
 	s->s_op = &ceph_super_ops;
 	s->s_export_op = &ceph_export_ops;
@@ -723,7 +917,7 @@ static int ceph_set_super(struct super_block *s, void *data)
 
 fail:
 	s->s_fs_info = NULL;
-	fsc->sb = NULL;
+	client->sb = NULL;
 	return ret;
 }
 
@@ -732,23 +926,30 @@ static int ceph_set_super(struct super_block *s, void *data)
  */
 static int ceph_compare_super(struct super_block *sb, void *data)
 {
-	struct ceph_fs_client *new = data;
-	struct ceph_mount_options *fsopt = new->mount_options;
-	struct ceph_options *opt = new->client->options;
-	struct ceph_fs_client *other = ceph_sb_to_client(sb);
+	struct ceph_client *new = data;
+	struct ceph_mount_args *args = new->mount_args;
+	struct ceph_client *other = ceph_sb_to_client(sb);
+	int i;
 
 	dout("ceph_compare_super %p\n", sb);
-
-	if (compare_mount_options(fsopt, opt, other)) {
-		dout("monitor(s)/mount options don't match\n");
-		return 0;
-	}
-	if ((opt->flags & CEPH_OPT_FSID) &&
-	    ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
-		dout("fsid doesn't match\n");
-		return 0;
+	if (args->flags & CEPH_OPT_FSID) {
+		if (ceph_fsid_compare(&args->fsid, &other->fsid)) {
+			dout("fsid doesn't match\n");
+			return 0;
+		}
+	} else {
+		/* do we share (a) monitor? */
+		for (i = 0; i < new->monc.monmap->num_mon; i++)
+			if (ceph_monmap_contains(other->monc.monmap,
+					 &new->monc.monmap->mon_inst[i].addr))
+				break;
+		if (i == new->monc.monmap->num_mon) {
+			dout("mon ip not part of monmap\n");
+			return 0;
+		}
+		dout("mon ip matches existing sb %p\n", sb);
 	}
-	if (fsopt->sb_flags != other->mount_options->sb_flags) {
+	if (args->sb_flags != other->mount_args->sb_flags) {
 		dout("flags differ\n");
 		return 0;
 	}
@@ -760,20 +961,19 @@ static int ceph_compare_super(struct super_block *sb, void *data)
  */
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
-static int ceph_register_bdi(struct super_block *sb,
-			     struct ceph_fs_client *fsc)
+static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
 {
 	int err;
 
 	/* set ra_pages based on rsize mount option? */
-	if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
-		fsc->backing_dev_info.ra_pages =
-			(fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
+	if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
+		client->backing_dev_info.ra_pages =
+			(client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
 			>> PAGE_SHIFT;
-	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
+	err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d",
 			   atomic_long_inc_return(&bdi_seq));
 	if (!err)
-		sb->s_bdi = &fsc->backing_dev_info;
+		sb->s_bdi = &client->backing_dev_info;
 	return err;
 }
 
@@ -782,52 +982,46 @@ static int ceph_get_sb(struct file_system_type *fs_type,
 		       struct vfsmount *mnt)
 {
 	struct super_block *sb;
-	struct ceph_fs_client *fsc;
+	struct ceph_client *client;
 	int err;
 	int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
 	const char *path = NULL;
-	struct ceph_mount_options *fsopt = NULL;
-	struct ceph_options *opt = NULL;
+	struct ceph_mount_args *args;
 
 	dout("ceph_get_sb\n");
-	err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
-	if (err < 0)
+	args = parse_mount_args(flags, data, dev_name, &path);
+	if (IS_ERR(args)) {
+		err = PTR_ERR(args);
 		goto out_final;
+	}
 
 	/* create client (which we may/may not use) */
-	fsc = create_fs_client(fsopt, opt);
-	if (IS_ERR(fsc)) {
-		err = PTR_ERR(fsc);
-		kfree(fsopt);
-		kfree(opt);
+	client = ceph_create_client(args);
+	if (IS_ERR(client)) {
+		err = PTR_ERR(client);
 		goto out_final;
 	}
 
-	err = ceph_mdsc_init(fsc);
-	if (err < 0)
-		goto out;
-
-	if (ceph_test_opt(fsc->client, NOSHARE))
+	if (client->mount_args->flags & CEPH_OPT_NOSHARE)
 		compare_super = NULL;
-	sb = sget(fs_type, compare_super, ceph_set_super, fsc);
+	sb = sget(fs_type, compare_super, ceph_set_super, client);
 	if (IS_ERR(sb)) {
 		err = PTR_ERR(sb);
 		goto out;
 	}
 
-	if (ceph_sb_to_client(sb) != fsc) {
-		ceph_mdsc_destroy(fsc);
-		destroy_fs_client(fsc);
-		fsc = ceph_sb_to_client(sb);
-		dout("get_sb got existing client %p\n", fsc);
+	if (ceph_sb_to_client(sb) != client) {
+		ceph_destroy_client(client);
+		client = ceph_sb_to_client(sb);
+		dout("get_sb got existing client %p\n", client);
 	} else {
-		dout("get_sb using new client %p\n", fsc);
-		err = ceph_register_bdi(sb, fsc);
+		dout("get_sb using new client %p\n", client);
+		err = ceph_register_bdi(sb, client);
 		if (err < 0)
 			goto out_splat;
 	}
 
-	err = ceph_mount(fsc, mnt, path);
+	err = ceph_mount(client, mnt, path);
 	if (err < 0)
 		goto out_splat;
 	dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root,
@@ -835,13 +1029,12 @@ static int ceph_get_sb(struct file_system_type *fs_type,
 	return 0;
 
 out_splat:
-	ceph_mdsc_close_sessions(fsc->mdsc);
+	ceph_mdsc_close_sessions(&client->mdsc);
 	deactivate_locked_super(sb);
 	goto out_final;
 
 out:
-	ceph_mdsc_destroy(fsc);
-	destroy_fs_client(fsc);
+	ceph_destroy_client(client);
 out_final:
 	dout("ceph_get_sb fail %d\n", err);
 	return err;
@@ -849,12 +1042,11 @@ static int ceph_get_sb(struct file_system_type *fs_type,
 
 static void ceph_kill_sb(struct super_block *s)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+	struct ceph_client *client = ceph_sb_to_client(s);
 	dout("kill_sb %p\n", s);
-	ceph_mdsc_pre_umount(fsc->mdsc);
+	ceph_mdsc_pre_umount(&client->mdsc);
 	kill_anon_super(s);    /* will call put_super after sb is r/o */
-	ceph_mdsc_destroy(fsc);
-	destroy_fs_client(fsc);
+	ceph_destroy_client(client);
 }
 
 static struct file_system_type ceph_fs_type = {
@@ -870,20 +1062,36 @@ static struct file_system_type ceph_fs_type = {
 
 static int __init init_ceph(void)
 {
-	int ret = init_caches();
-	if (ret)
+	int ret = 0;
+
+	ret = ceph_debugfs_init();
+	if (ret < 0)
 		goto out;
 
+	ret = ceph_msgr_init();
+	if (ret < 0)
+		goto out_debugfs;
+
+	ret = init_caches();
+	if (ret)
+		goto out_msgr;
+
 	ret = register_filesystem(&ceph_fs_type);
 	if (ret)
 		goto out_icache;
 
-	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
-
+	pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n",
+		CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL,
+		CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
+		CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
 	return 0;
 
 out_icache:
 	destroy_caches();
+out_msgr:
+	ceph_msgr_exit();
+out_debugfs:
+	ceph_debugfs_cleanup();
 out:
 	return ret;
 }
@@ -893,6 +1101,8 @@ static void __exit exit_ceph(void)
 	dout("exit_ceph\n");
 	unregister_filesystem(&ceph_fs_type);
 	destroy_caches();
+	ceph_msgr_exit();
+	ceph_debugfs_cleanup();
 }
 
 module_init(init_ceph);
diff --git a/trunk/fs/ceph/super.h b/trunk/fs/ceph/super.h
index 1886294e12f7..b87638e84c4b 100644
--- a/trunk/fs/ceph/super.h
+++ b/trunk/fs/ceph/super.h
@@ -1,7 +1,7 @@
 #ifndef _FS_CEPH_SUPER_H
 #define _FS_CEPH_SUPER_H
 
-#include <linux/ceph/ceph_debug.h>
+#include "ceph_debug.h"
 
 #include <asm/unaligned.h>
 #include <linux/backing-dev.h>
@@ -14,7 +14,13 @@
 #include <linux/writeback.h>
 #include <linux/slab.h>
 
-#include <linux/ceph/libceph.h>
+#include "types.h"
+#include "messenger.h"
+#include "msgpool.h"
+#include "mon_client.h"
+#include "mds_client.h"
+#include "osd_client.h"
+#include "ceph_fs.h"
 
 /* f_type in struct statfs */
 #define CEPH_SUPER_MAGIC 0x00c36400
@@ -24,25 +30,42 @@
 #define CEPH_BLOCK_SHIFT   20  /* 1 MB */
 #define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
 
-#define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
-#define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */
-#define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
+/*
+ * Supported features
+ */
+#define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK
+#define CEPH_FEATURE_REQUIRED  CEPH_FEATURE_NOSRCADDR
 
-#define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)
+/*
+ * mount options
+ */
+#define CEPH_OPT_FSID             (1<<0)
+#define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
+#define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
+#define CEPH_OPT_DIRSTAT          (1<<4) /* funky `cat dirname` for stats */
+#define CEPH_OPT_RBYTES           (1<<5) /* dir st_bytes = rbytes */
+#define CEPH_OPT_NOCRC            (1<<6) /* no data crc on writes */
+#define CEPH_OPT_NOASYNCREADDIR   (1<<7) /* no dcache readdir */
 
-#define ceph_set_mount_opt(fsc, opt) \
-	(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
-#define ceph_test_mount_opt(fsc, opt) \
-	(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
+#define CEPH_OPT_DEFAULT   (CEPH_OPT_RBYTES)
 
-#define CEPH_MAX_READDIR_DEFAULT        1024
-#define CEPH_MAX_READDIR_BYTES_DEFAULT  (512*1024)
-#define CEPH_SNAPDIRNAME_DEFAULT        ".snap"
+#define ceph_set_opt(client, opt) \
+	(client)->mount_args->flags |= CEPH_OPT_##opt;
+#define ceph_test_opt(client, opt) \
+	(!!((client)->mount_args->flags & CEPH_OPT_##opt))
 
-struct ceph_mount_options {
-	int flags;
-	int sb_flags;
 
+struct ceph_mount_args {
+	int sb_flags;
+	int flags;
+	struct ceph_fsid fsid;
+	struct ceph_entity_addr my_addr;
+	int num_mon;
+	struct ceph_entity_addr *mon_addr;
+	int mount_timeout;
+	int osd_idle_ttl;
+	int osd_timeout;
+	int osd_keepalive_timeout;
 	int wsize;
 	int rsize;            /* max readahead */
 	int congestion_kb;    /* max writeback in flight */
@@ -50,25 +73,82 @@ struct ceph_mount_options {
 	int cap_release_safety;
 	int max_readdir;       /* max readdir result (entires) */
 	int max_readdir_bytes; /* max readdir result (bytes) */
-
-	/*
-	 * everything above this point can be memcmp'd; everything below
-	 * is handled in compare_mount_options()
-	 */
-
 	char *snapdir_name;   /* default ".snap" */
+	char *name;
+	char *secret;
 };
 
-struct ceph_fs_client {
-	struct super_block *sb;
+/*
+ * defaults
+ */
+#define CEPH_MOUNT_TIMEOUT_DEFAULT  60
+#define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
+#define CEPH_OSD_KEEPALIVE_DEFAULT  5
+#define CEPH_OSD_IDLE_TTL_DEFAULT    60
+#define CEPH_MOUNT_RSIZE_DEFAULT    (512*1024) /* readahead */
+#define CEPH_MAX_READDIR_DEFAULT    1024
+#define CEPH_MAX_READDIR_BYTES_DEFAULT    (512*1024)
+
+#define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
+#define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024)
+
+#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
+#define CEPH_AUTH_NAME_DEFAULT   "guest"
+/*
+ * Delay telling the MDS we no longer want caps, in case we reopen
+ * the file.  Delay a minimum amount of time, even if we send a cap
+ * message for some other reason.  Otherwise, take the oppotunity to
+ * update the mds to avoid sending another message later.
+ */
+#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT      5  /* cap release delay */
+#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT     60  /* cap release delay */
+
+#define CEPH_CAP_RELEASE_SAFETY_DEFAULT        (CEPH_CAPS_PER_RELEASE * 4)
+
+/* mount state */
+enum {
+	CEPH_MOUNT_MOUNTING,
+	CEPH_MOUNT_MOUNTED,
+	CEPH_MOUNT_UNMOUNTING,
+	CEPH_MOUNT_UNMOUNTED,
+	CEPH_MOUNT_SHUTDOWN,
+};
 
-	struct ceph_mount_options *mount_options;
-	struct ceph_client *client;
+/*
+ * subtract jiffies
+ */
+static inline unsigned long time_sub(unsigned long a, unsigned long b)
+{
+	BUG_ON(time_after(b, a));
+	return (long)a - (long)b;
+}
+
+/*
+ * per-filesystem client state
+ *
+ * possibly shared by multiple mount points, if they are
+ * mounting the same ceph filesystem/cluster.
+ */
+struct ceph_client {
+	struct ceph_fsid fsid;
+	bool have_fsid;
+
+	struct mutex mount_mutex;       /* serialize mount attempts */
+	struct ceph_mount_args *mount_args;
+
+	struct super_block *sb;
 
 	unsigned long mount_state;
+	wait_queue_head_t auth_wq;
+
+	int auth_err;
+
 	int min_caps;                  /* min caps i added */
 
-	struct ceph_mds_client *mdsc;
+	struct ceph_messenger *msgr;   /* messenger instance */
+	struct ceph_mon_client monc;
+	struct ceph_mds_client mdsc;
+	struct ceph_osd_client osdc;
 
 	/* writeback */
 	mempool_t *wb_pagevec_pool;
@@ -80,14 +160,14 @@ struct ceph_fs_client {
 	struct backing_dev_info backing_dev_info;
 
 #ifdef CONFIG_DEBUG_FS
-	struct dentry *debugfs_dentry_lru, *debugfs_caps;
+	struct dentry *debugfs_monmap;
+	struct dentry *debugfs_mdsmap, *debugfs_osdmap;
+	struct dentry *debugfs_dir, *debugfs_dentry_lru, *debugfs_caps;
 	struct dentry *debugfs_congestion_kb;
 	struct dentry *debugfs_bdi;
-	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
 #endif
 };
 
-
 /*
  * File i/o capability.  This tracks shared state with the metadata
  * server that allows us to cache or writeback attributes or to read
@@ -195,20 +275,6 @@ struct ceph_inode_xattr {
 	int should_free_val;
 };
 
-/*
- * Ceph dentry state
- */
-struct ceph_dentry_info {
-	struct ceph_mds_session *lease_session;
-	u32 lease_gen, lease_shared_gen;
-	u32 lease_seq;
-	unsigned long lease_renew_after, lease_renew_from;
-	struct list_head lru;
-	struct dentry *dentry;
-	u64 time;
-	u64 offset;
-};
-
 struct ceph_inode_xattrs_info {
 	/*
 	 * (still encoded) xattr blob. we avoid the overhead of parsing
@@ -230,6 +296,11 @@ struct ceph_inode_xattrs_info {
 /*
  * Ceph inode.
  */
+#define CEPH_I_COMPLETE  1  /* we have complete directory cached */
+#define CEPH_I_NODELAY   4  /* do not delay cap release */
+#define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
+#define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
+
 struct ceph_inode_info {
 	struct ceph_vino i_vino;   /* ceph ino + snap */
 
@@ -320,63 +391,6 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
 	return container_of(inode, struct ceph_inode_info, vfs_inode);
 }
 
-static inline struct ceph_vino ceph_vino(struct inode *inode)
-{
-	return ceph_inode(inode)->i_vino;
-}
-
-/*
- * ino_t is <64 bits on many architectures, blech.
- *
- * don't include snap in ino hash, at least for now.
- */
-static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
-{
-	ino_t ino = (ino_t)vino.ino;  /* ^ (vino.snap << 20); */
-#if BITS_PER_LONG == 32
-	ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8;
-	if (!ino)
-		ino = 1;
-#endif
-	return ino;
-}
-
-/* for printf-style formatting */
-#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
-
-static inline u64 ceph_ino(struct inode *inode)
-{
-	return ceph_inode(inode)->i_vino.ino;
-}
-static inline u64 ceph_snap(struct inode *inode)
-{
-	return ceph_inode(inode)->i_vino.snap;
-}
-
-static inline int ceph_ino_compare(struct inode *inode, void *data)
-{
-	struct ceph_vino *pvino = (struct ceph_vino *)data;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	return ci->i_vino.ino == pvino->ino &&
-		ci->i_vino.snap == pvino->snap;
-}
-
-static inline struct inode *ceph_find_inode(struct super_block *sb,
-					    struct ceph_vino vino)
-{
-	ino_t t = ceph_vino_to_ino(vino);
-	return ilookup5(sb, t, ceph_ino_compare, &vino);
-}
-
-
-/*
- * Ceph inode.
- */
-#define CEPH_I_COMPLETE  1  /* we have complete directory cached */
-#define CEPH_I_NODELAY   4  /* do not delay cap release */
-#define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
-#define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
-
 static inline void ceph_i_clear(struct inode *inode, unsigned mask)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
@@ -400,9 +414,8 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask)
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	bool r;
 
-	spin_lock(&inode->i_lock);
+	smp_mb();
 	r = (ci->i_ceph_flags & mask) == mask;
-	spin_unlock(&inode->i_lock);
 	return r;
 }
 
@@ -419,6 +432,20 @@ extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
 			    struct ceph_inode_frag *pfrag,
 			    int *found);
 
+/*
+ * Ceph dentry state
+ */
+struct ceph_dentry_info {
+	struct ceph_mds_session *lease_session;
+	u32 lease_gen, lease_shared_gen;
+	u32 lease_seq;
+	unsigned long lease_renew_after, lease_renew_from;
+	struct list_head lru;
+	struct dentry *dentry;
+	u64 time;
+	u64 offset;
+};
+
 static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry)
 {
 	return (struct ceph_dentry_info *)dentry->d_fsdata;
@@ -429,6 +456,22 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
 	return ((loff_t)frag << 32) | (loff_t)off;
 }
 
+/*
+ * ino_t is <64 bits on many architectures, blech.
+ *
+ * don't include snap in ino hash, at least for now.
+ */
+static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
+{
+	ino_t ino = (ino_t)vino.ino;  /* ^ (vino.snap << 20); */
+#if BITS_PER_LONG == 32
+	ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8;
+	if (!ino)
+		ino = 1;
+#endif
+	return ino;
+}
+
 static inline int ceph_set_ino_cb(struct inode *inode, void *data)
 {
 	ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
@@ -436,6 +479,39 @@ static inline int ceph_set_ino_cb(struct inode *inode, void *data)
 	return 0;
 }
 
+static inline struct ceph_vino ceph_vino(struct inode *inode)
+{
+	return ceph_inode(inode)->i_vino;
+}
+
+/* for printf-style formatting */
+#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
+
+static inline u64 ceph_ino(struct inode *inode)
+{
+	return ceph_inode(inode)->i_vino.ino;
+}
+static inline u64 ceph_snap(struct inode *inode)
+{
+	return ceph_inode(inode)->i_vino.snap;
+}
+
+static inline int ceph_ino_compare(struct inode *inode, void *data)
+{
+	struct ceph_vino *pvino = (struct ceph_vino *)data;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	return ci->i_vino.ino == pvino->ino &&
+		ci->i_vino.snap == pvino->snap;
+}
+
+static inline struct inode *ceph_find_inode(struct super_block *sb,
+					    struct ceph_vino vino)
+{
+	ino_t t = ceph_vino_to_ino(vino);
+	return ilookup5(sb, t, ceph_ino_compare, &vino);
+}
+
+
 /*
  * caps helpers
  */
@@ -500,18 +576,18 @@ extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 			     struct ceph_cap_reservation *ctx, int need);
 extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
 			       struct ceph_cap_reservation *ctx);
-extern void ceph_reservation_status(struct ceph_fs_client *client,
+extern void ceph_reservation_status(struct ceph_client *client,
 				    int *total, int *avail, int *used,
 				    int *reserved, int *min);
 
-static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
+static inline struct ceph_client *ceph_inode_to_client(struct inode *inode)
 {
-	return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
+	return (struct ceph_client *)inode->i_sb->s_fs_info;
 }
 
-static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
+static inline struct ceph_client *ceph_sb_to_client(struct super_block *sb)
 {
-	return (struct ceph_fs_client *)sb->s_fs_info;
+	return (struct ceph_client *)sb->s_fs_info;
 }
 
 
@@ -540,6 +616,51 @@ struct ceph_file_info {
 
 
 
+/*
+ * snapshots
+ */
+
+/*
+ * A "snap context" is the set of existing snapshots when we
+ * write data.  It is used by the OSD to guide its COW behavior.
+ *
+ * The ceph_snap_context is refcounted, and attached to each dirty
+ * page, indicating which context the dirty data belonged when it was
+ * dirtied.
+ */
+struct ceph_snap_context {
+	atomic_t nref;
+	u64 seq;
+	int num_snaps;
+	u64 snaps[];
+};
+
+static inline struct ceph_snap_context *
+ceph_get_snap_context(struct ceph_snap_context *sc)
+{
+	/*
+	printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
+	       atomic_read(&sc->nref)+1);
+	*/
+	if (sc)
+		atomic_inc(&sc->nref);
+	return sc;
+}
+
+static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
+{
+	if (!sc)
+		return;
+	/*
+	printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
+	       atomic_read(&sc->nref)-1);
+	*/
+	if (atomic_dec_and_test(&sc->nref)) {
+		/*printk(" deleting snap_context %p\n", sc);*/
+		kfree(sc);
+	}
+}
+
 /*
  * A "snap realm" describes a subset of the file hierarchy sharing
  * the same set of snapshots that apply to it.  The realms themselves
@@ -578,33 +699,16 @@ struct ceph_snap_realm {
 	spinlock_t inodes_with_caps_lock;
 };
 
-static inline int default_congestion_kb(void)
-{
-	int congestion_kb;
 
-	/*
-	 * Copied from NFS
-	 *
-	 * congestion size, scale with available memory.
-	 *
-	 *  64MB:    8192k
-	 * 128MB:   11585k
-	 * 256MB:   16384k
-	 * 512MB:   23170k
-	 *   1GB:   32768k
-	 *   2GB:   46340k
-	 *   4GB:   65536k
-	 *   8GB:   92681k
-	 *  16GB:  131072k
-	 *
-	 * This allows larger machines to have larger/more transfers.
-	 * Limit the default to 256M
-	 */
-	congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
-	if (congestion_kb > 256*1024)
-		congestion_kb = 256*1024;
-
-	return congestion_kb;
+
+/*
+ * calculate the number of pages a given length and offset map onto,
+ * if we align the data.
+ */
+static inline int calc_pages_for(u64 off, u64 len)
+{
+	return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
+		(off >> PAGE_CACHE_SHIFT);
 }
 
 
@@ -637,6 +741,16 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
 			   ci_item)->writing;
 }
 
+
+/* super.c */
+extern struct kmem_cache *ceph_inode_cachep;
+extern struct kmem_cache *ceph_cap_cachep;
+extern struct kmem_cache *ceph_dentry_cachep;
+extern struct kmem_cache *ceph_file_cachep;
+
+extern const char *ceph_msg_type_name(int type);
+extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
+
 /* inode.c */
 extern const struct inode_operations ceph_file_iops;
 
@@ -743,18 +857,12 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
 /* file.c */
 extern const struct file_operations ceph_file_fops;
 extern const struct address_space_operations ceph_aops;
-extern int ceph_copy_to_page_vector(struct page **pages,
-				    const char *data,
-				    loff_t off, size_t len);
-extern int ceph_copy_from_page_vector(struct page **pages,
-				    char *data,
-				    loff_t off, size_t len);
-extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
 				       struct nameidata *nd, int mode,
 				       int locked_dir);
 extern int ceph_release(struct inode *inode, struct file *filp);
+extern void ceph_release_page_vector(struct page **pages, int num_pages);
 
 /* dir.c */
 extern const struct file_operations ceph_dir_fops;
@@ -784,6 +892,12 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 /* export.c */
 extern const struct export_operations ceph_export_ops;
 
+/* debugfs.c */
+extern int ceph_debugfs_init(void);
+extern void ceph_debugfs_cleanup(void);
+extern int ceph_debugfs_client_init(struct ceph_client *client);
+extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
+
 /* locks.c */
 extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
 extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
@@ -800,8 +914,4 @@ static inline struct inode *get_dentry_parent_inode(struct dentry *dentry)
 	return NULL;
 }
 
-/* debugfs.c */
-extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
-extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
-
 #endif /* _FS_CEPH_SUPER_H */
diff --git a/trunk/include/linux/ceph/types.h b/trunk/fs/ceph/types.h
similarity index 100%
rename from trunk/include/linux/ceph/types.h
rename to trunk/fs/ceph/types.h
diff --git a/trunk/fs/ceph/xattr.c b/trunk/fs/ceph/xattr.c
index 6e12a6ba5f79..9578af610b73 100644
--- a/trunk/fs/ceph/xattr.c
+++ b/trunk/fs/ceph/xattr.c
@@ -1,9 +1,6 @@
-#include <linux/ceph/ceph_debug.h>
-
+#include "ceph_debug.h"
 #include "super.h"
-#include "mds_client.h"
-
-#include <linux/ceph/decode.h>
+#include "decode.h"
 
 #include <linux/xattr.h>
 #include <linux/slab.h>
@@ -623,12 +620,12 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
 static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
 			      const char *value, size_t size, int flags)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+	struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
 	struct inode *inode = dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct inode *parent_inode = dentry->d_parent->d_inode;
 	struct ceph_mds_request *req;
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	int err;
 	int i, nr_pages;
 	struct page **pages = NULL;
@@ -716,9 +713,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
 
 	/* preallocate memory for xattr name, value, index node */
 	err = -ENOMEM;
-	newname = kmemdup(name, name_len + 1, GFP_NOFS);
+	newname = kmalloc(name_len + 1, GFP_NOFS);
 	if (!newname)
 		goto out;
+	memcpy(newname, name, name_len + 1);
 
 	if (val_len) {
 		newval = kmalloc(val_len + 1, GFP_NOFS);
@@ -779,8 +777,8 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
 
 static int ceph_send_removexattr(struct dentry *dentry, const char *name)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
+	struct ceph_mds_client *mdsc = &client->mdsc;
 	struct inode *inode = dentry->d_inode;
 	struct inode *parent_inode = dentry->d_parent->d_inode;
 	struct ceph_mds_request *req;
diff --git a/trunk/fs/gfs2/Kconfig b/trunk/fs/gfs2/Kconfig
index c465ae066c62..cc9665522148 100644
--- a/trunk/fs/gfs2/Kconfig
+++ b/trunk/fs/gfs2/Kconfig
@@ -1,6 +1,6 @@
 config GFS2_FS
 	tristate "GFS2 file system support"
-	depends on (64BIT || LBDAF)
+	depends on EXPERIMENTAL && (64BIT || LBDAF)
 	select DLM if GFS2_FS_LOCKING_DLM
 	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
 	select SYSFS if GFS2_FS_LOCKING_DLM
diff --git a/trunk/fs/gfs2/aops.c b/trunk/fs/gfs2/aops.c
index 6b24afb96aae..194fe16d8418 100644
--- a/trunk/fs/gfs2/aops.c
+++ b/trunk/fs/gfs2/aops.c
@@ -36,8 +36,8 @@
 #include "glops.h"
 
 
-void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
-			    unsigned int from, unsigned int to)
+static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
+				   unsigned int from, unsigned int to)
 {
 	struct buffer_head *head = page_buffers(page);
 	unsigned int bsize = head->b_size;
@@ -615,7 +615,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
 	int alloc_required;
 	int error = 0;
-	struct gfs2_alloc *al = NULL;
+	struct gfs2_alloc *al;
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
 	unsigned to = from + len;
@@ -663,8 +663,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 		rblocks += RES_STATFS + RES_QUOTA;
 	if (&ip->i_inode == sdp->sd_rindex)
 		rblocks += 2 * RES_STATFS;
-	if (alloc_required)
-		rblocks += gfs2_rg_blocks(al);
 
 	error = gfs2_trans_begin(sdp, rblocks,
 				 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
@@ -698,11 +696,13 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 
 	page_cache_release(page);
 
-	gfs2_trans_end(sdp);
+	/*
+	 * XXX(truncate): the call below should probably be replaced with
+	 * a call to the gfs2-specific truncate blocks helper to actually
+	 * release disk blocks..
+	 */
 	if (pos + len > ip->i_inode.i_size)
-		gfs2_trim_blocks(&ip->i_inode);
-	goto out_trans_fail;
-
+		truncate_setsize(&ip->i_inode, ip->i_inode.i_size);
 out_endtrans:
 	gfs2_trans_end(sdp);
 out_trans_fail:
@@ -802,8 +802,10 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
 	page_cache_release(page);
 
 	if (copied) {
-		if (inode->i_size < to)
+		if (inode->i_size < to) {
 			i_size_write(inode, to);
+			ip->i_disksize = inode->i_size;
+		}
 		gfs2_dinode_out(ip, di);
 		mark_inode_dirty(inode);
 	}
@@ -874,6 +876,8 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 
 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
 	if (ret > 0) {
+		if (inode->i_size > ip->i_disksize)
+			ip->i_disksize = inode->i_size;
 		gfs2_dinode_out(ip, dibh->b_data);
 		mark_inode_dirty(inode);
 	}
diff --git a/trunk/fs/gfs2/bmap.c b/trunk/fs/gfs2/bmap.c
index 5476c066d4ee..6f482809d1a3 100644
--- a/trunk/fs/gfs2/bmap.c
+++ b/trunk/fs/gfs2/bmap.c
@@ -50,7 +50,7 @@ struct strip_mine {
  * @ip: the inode
  * @dibh: the dinode buffer
  * @block: the block number that was allocated
- * @page: The (optional) page. This is looked up if @page is NULL
+ * @private: any locked page held by the caller process
  *
  * Returns: errno
  */
@@ -109,7 +109,8 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 /**
  * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
  * @ip: The GFS2 inode to unstuff
- * @page: The (optional) page. This is looked up if the @page is NULL
+ * @unstuffer: the routine that handles unstuffing a non-zero length file
+ * @private: private data for the unstuffer
  *
  * This routine unstuffs a dinode and returns it to a "normal" state such
  * that the height can be grown in the traditional way.
@@ -131,7 +132,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 	if (error)
 		goto out;
 
-	if (i_size_read(&ip->i_inode)) {
+	if (ip->i_disksize) {
 		/* Get a free block, fill it with the stuffed data,
 		   and write it out to disk */
 
@@ -160,7 +161,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 	di = (struct gfs2_dinode *)dibh->b_data;
 	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 
-	if (i_size_read(&ip->i_inode)) {
+	if (ip->i_disksize) {
 		*(__be64 *)(di + 1) = cpu_to_be64(block);
 		gfs2_add_inode_blocks(&ip->i_inode, 1);
 		di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
@@ -883,15 +884,84 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 	return error;
 }
 
+/**
+ * do_grow - Make a file look bigger than it is
+ * @ip: the inode
+ * @size: the size to set the file to
+ *
+ * Called with an exclusive lock on @ip.
+ *
+ * Returns: errno
+ */
+
+static int do_grow(struct gfs2_inode *ip, u64 size)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	struct gfs2_alloc *al;
+	struct buffer_head *dibh;
+	int error;
+
+	al = gfs2_alloc_get(ip);
+	if (!al)
+		return -ENOMEM;
+
+	error = gfs2_quota_lock_check(ip);
+	if (error)
+		goto out;
+
+	al->al_requested = sdp->sd_max_height + RES_DATA;
+
+	error = gfs2_inplace_reserve(ip);
+	if (error)
+		goto out_gunlock_q;
+
+	error = gfs2_trans_begin(sdp,
+			sdp->sd_max_height + al->al_rgd->rd_length +
+			RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
+	if (error)
+		goto out_ipres;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out_end_trans;
+
+	if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
+		if (gfs2_is_stuffed(ip)) {
+			error = gfs2_unstuff_dinode(ip, NULL);
+			if (error)
+				goto out_brelse;
+		}
+	}
+
+	ip->i_disksize = size;
+	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
+	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+	gfs2_dinode_out(ip, dibh->b_data);
+
+out_brelse:
+	brelse(dibh);
+out_end_trans:
+	gfs2_trans_end(sdp);
+out_ipres:
+	gfs2_inplace_release(ip);
+out_gunlock_q:
+	gfs2_quota_unlock(ip);
+out:
+	gfs2_alloc_put(ip);
+	return error;
+}
+
+
 /**
  * gfs2_block_truncate_page - Deal with zeroing out data for truncate
  *
  * This is partly borrowed from ext3.
  */
-static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
+static int gfs2_block_truncate_page(struct address_space *mapping)
 {
 	struct inode *inode = mapping->host;
 	struct gfs2_inode *ip = GFS2_I(inode);
+	loff_t from = inode->i_size;
 	unsigned long index = from >> PAGE_CACHE_SHIFT;
 	unsigned offset = from & (PAGE_CACHE_SIZE-1);
 	unsigned blocksize, iblock, length, pos;
@@ -953,11 +1023,9 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
 	return err;
 }
 
-static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
+static int trunc_start(struct gfs2_inode *ip, u64 size)
 {
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct address_space *mapping = inode->i_mapping;
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *dibh;
 	int journaled = gfs2_is_jdata(ip);
 	int error;
@@ -971,26 +1039,31 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
 	if (error)
 		goto out;
 
-	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-
 	if (gfs2_is_stuffed(ip)) {
-		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
+		u64 dsize = size + sizeof(struct gfs2_dinode);
+		ip->i_disksize = size;
+		ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
+		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+		gfs2_dinode_out(ip, dibh->b_data);
+		if (dsize > dibh->b_size)
+			dsize = dibh->b_size;
+		gfs2_buffer_clear_tail(dibh, dsize);
+		error = 1;
 	} else {
-		if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) {
-			error = gfs2_block_truncate_page(mapping, newsize);
-			if (error)
-				goto out_brelse;
+		if (size & (u64)(sdp->sd_sb.sb_bsize - 1))
+			error = gfs2_block_truncate_page(ip->i_inode.i_mapping);
+
+		if (!error) {
+			ip->i_disksize = size;
+			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
+			ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
+			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+			gfs2_dinode_out(ip, dibh->b_data);
 		}
-		ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
 	}
 
-	i_size_write(inode, newsize);
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
-	gfs2_dinode_out(ip, dibh->b_data);
-
-	truncate_pagecache(inode, oldsize, newsize);
-out_brelse:
 	brelse(dibh);
+
 out:
 	gfs2_trans_end(sdp);
 	return error;
@@ -1050,7 +1123,7 @@ static int trunc_end(struct gfs2_inode *ip)
 	if (error)
 		goto out;
 
-	if (!i_size_read(&ip->i_inode)) {
+	if (!ip->i_disksize) {
 		ip->i_height = 0;
 		ip->i_goal = ip->i_no_addr;
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
@@ -1070,154 +1143,92 @@ static int trunc_end(struct gfs2_inode *ip)
 
 /**
  * do_shrink - make a file smaller
- * @inode: the inode
- * @oldsize: the current inode size
- * @newsize: the size to make the file
+ * @ip: the inode
+ * @size: the size to make the file
+ * @truncator: function to truncate the last partial block
  *
- * Called with an exclusive lock on @inode. The @size must
- * be equal to or smaller than the current inode size.
+ * Called with an exclusive lock on @ip.
  *
  * Returns: errno
  */
 
-static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize)
+static int do_shrink(struct gfs2_inode *ip, u64 size)
 {
-	struct gfs2_inode *ip = GFS2_I(inode);
 	int error;
 
-	error = trunc_start(inode, oldsize, newsize);
+	error = trunc_start(ip, size);
 	if (error < 0)
 		return error;
-	if (gfs2_is_stuffed(ip))
+	if (error > 0)
 		return 0;
 
-	error = trunc_dealloc(ip, newsize);
-	if (error == 0)
+	error = trunc_dealloc(ip, size);
+	if (!error)
 		error = trunc_end(ip);
 
 	return error;
 }
 
-void gfs2_trim_blocks(struct inode *inode)
-{
-	u64 size = inode->i_size;
-	int ret;
-
-	ret = do_shrink(inode, size, size);
-	WARN_ON(ret != 0);
-}
-
-/**
- * do_grow - Touch and update inode size
- * @inode: The inode
- * @size: The new size
- *
- * This function updates the timestamps on the inode and
- * may also increase the size of the inode. This function
- * must not be called with @size any smaller than the current
- * inode size.
- *
- * Although it is not strictly required to unstuff files here,
- * earlier versions of GFS2 have a bug in the stuffed file reading
- * code which will result in a buffer overrun if the size is larger
- * than the max stuffed file size. In order to prevent this from
- * occuring, such files are unstuffed, but in other cases we can
- * just update the inode size directly.
- *
- * Returns: 0 on success, or -ve on error
- */
-
-static int do_grow(struct inode *inode, u64 size)
+static int do_touch(struct gfs2_inode *ip, u64 size)
 {
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *dibh;
-	struct gfs2_alloc *al = NULL;
 	int error;
 
-	if (gfs2_is_stuffed(ip) &&
-	    (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
-		al = gfs2_alloc_get(ip);
-		if (al == NULL)
-			return -ENOMEM;
-
-		error = gfs2_quota_lock_check(ip);
-		if (error)
-			goto do_grow_alloc_put;
-
-		al->al_requested = 1;
-		error = gfs2_inplace_reserve(ip);
-		if (error)
-			goto do_grow_qunlock;
-	}
-
-	error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0);
+	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
 	if (error)
-		goto do_grow_release;
+		return error;
 
-	if (al) {
-		error = gfs2_unstuff_dinode(ip, NULL);
-		if (error)
-			goto do_end_trans;
-	}
+	down_write(&ip->i_rw_mutex);
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (error)
-		goto do_end_trans;
+		goto do_touch_out;
 
-	i_size_write(inode, size);
 	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 
-do_end_trans:
+do_touch_out:
+	up_write(&ip->i_rw_mutex);
 	gfs2_trans_end(sdp);
-do_grow_release:
-	if (al) {
-		gfs2_inplace_release(ip);
-do_grow_qunlock:
-		gfs2_quota_unlock(ip);
-do_grow_alloc_put:
-		gfs2_alloc_put(ip);
-	}
 	return error;
 }
 
 /**
- * gfs2_setattr_size - make a file a given size
- * @inode: the inode
- * @newsize: the size to make the file
+ * gfs2_truncatei - make a file a given size
+ * @ip: the inode
+ * @size: the size to make the file
+ * @truncator: function to truncate the last partial block
  *
- * The file size can grow, shrink, or stay the same size. This
- * is called holding i_mutex and an exclusive glock on the inode
- * in question.
+ * The file size can grow, shrink, or stay the same size.
  *
  * Returns: errno
  */
 
-int gfs2_setattr_size(struct inode *inode, u64 newsize)
+int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
 {
-	int ret;
-	u64 oldsize;
-
-	BUG_ON(!S_ISREG(inode->i_mode));
+	int error;
 
-	ret = inode_newsize_ok(inode, newsize);
-	if (ret)
-		return ret;
+	if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode)))
+		return -EINVAL;
 
-	oldsize = inode->i_size;
-	if (newsize >= oldsize)
-		return do_grow(inode, newsize);
+	if (size > ip->i_disksize)
+		error = do_grow(ip, size);
+	else if (size < ip->i_disksize)
+		error = do_shrink(ip, size);
+	else
+		/* update time stamps */
+		error = do_touch(ip, size);
 
-	return do_shrink(inode, oldsize, newsize);
+	return error;
 }
 
 int gfs2_truncatei_resume(struct gfs2_inode *ip)
 {
 	int error;
-	error = trunc_dealloc(ip, i_size_read(&ip->i_inode));
+	error = trunc_dealloc(ip, ip->i_disksize);
 	if (!error)
 		error = trunc_end(ip);
 	return error;
@@ -1258,7 +1269,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
 
 	shift = sdp->sd_sb.sb_bsize_shift;
 	BUG_ON(gfs2_is_dir(ip));
-	end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
+	end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift;
 	lblock = offset >> shift;
 	lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
 	if (lblock_stop > end_of_file)
diff --git a/trunk/fs/gfs2/bmap.h b/trunk/fs/gfs2/bmap.h
index 42fea03e2bd9..a20a5213135a 100644
--- a/trunk/fs/gfs2/bmap.h
+++ b/trunk/fs/gfs2/bmap.h
@@ -44,16 +44,14 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
 	}
 }
 
-extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
-extern int gfs2_block_map(struct inode *inode, sector_t lblock,
-			  struct buffer_head *bh, int create);
-extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
-			   u64 *dblock, unsigned *extlen);
-extern int gfs2_setattr_size(struct inode *inode, u64 size);
-extern void gfs2_trim_blocks(struct inode *inode);
-extern int gfs2_truncatei_resume(struct gfs2_inode *ip);
-extern int gfs2_file_dealloc(struct gfs2_inode *ip);
-extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
-				     unsigned int len);
+int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
+int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create);
+int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen);
+
+int gfs2_truncatei(struct gfs2_inode *ip, u64 size);
+int gfs2_truncatei_resume(struct gfs2_inode *ip);
+int gfs2_file_dealloc(struct gfs2_inode *ip);
+int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
+			      unsigned int len);
 
 #endif /* __BMAP_DOT_H__ */
diff --git a/trunk/fs/gfs2/dentry.c b/trunk/fs/gfs2/dentry.c
index 6798755b3858..bb7907bde3d8 100644
--- a/trunk/fs/gfs2/dentry.c
+++ b/trunk/fs/gfs2/dentry.c
@@ -49,7 +49,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 		ip = GFS2_I(inode);
 	}
 
-	if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
+	if (sdp->sd_args.ar_localcaching)
 		goto valid;
 
 	had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL);
diff --git a/trunk/fs/gfs2/dir.c b/trunk/fs/gfs2/dir.c
index 5c356d09c321..b9dd88a78dd4 100644
--- a/trunk/fs/gfs2/dir.c
+++ b/trunk/fs/gfs2/dir.c
@@ -79,9 +79,6 @@
 #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
 #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
 
-struct qstr gfs2_qdot __read_mostly;
-struct qstr gfs2_qdotdot __read_mostly;
-
 typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len,
 			    u64 leaf_no, void *data);
 typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
@@ -130,8 +127,8 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
-	if (ip->i_inode.i_size < offset + size)
-		i_size_write(&ip->i_inode, offset + size);
+	if (ip->i_disksize < offset + size)
+		ip->i_disksize = offset + size;
 	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 	gfs2_dinode_out(ip, dibh->b_data);
 
@@ -228,8 +225,8 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
 	if (error)
 		return error;
 
-	if (ip->i_inode.i_size < offset + copied)
-		i_size_write(&ip->i_inode, offset + copied);
+	if (ip->i_disksize < offset + copied)
+		ip->i_disksize = offset + copied;
 	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
@@ -278,13 +275,12 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
 	unsigned int o;
 	int copied = 0;
 	int error = 0;
-	u64 disksize = i_size_read(&ip->i_inode);
 
-	if (offset >= disksize)
+	if (offset >= ip->i_disksize)
 		return 0;
 
-	if (offset + size > disksize)
-		size = disksize - offset;
+	if (offset + size > ip->i_disksize)
+		size = ip->i_disksize - offset;
 
 	if (!size)
 		return 0;
@@ -731,7 +727,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
 		unsigned hsize = 1 << ip->i_depth;
 		unsigned index;
 		u64 ln;
-		if (hsize * sizeof(u64) != i_size_read(inode)) {
+		if (hsize * sizeof(u64) != ip->i_disksize) {
 			gfs2_consist_inode(ip);
 			return ERR_PTR(-EIO);
 		}
@@ -883,7 +879,7 @@ static int dir_make_exhash(struct inode *inode)
 	for (x = sdp->sd_hash_ptrs; x--; lp++)
 		*lp = cpu_to_be64(bn);
 
-	i_size_write(inode, sdp->sd_sb.sb_bsize / 2);
+	dip->i_disksize = sdp->sd_sb.sb_bsize / 2;
 	gfs2_add_inode_blocks(&dip->i_inode, 1);
 	dip->i_diskflags |= GFS2_DIF_EXHASH;
 
@@ -1061,12 +1057,11 @@ static int dir_double_exhash(struct gfs2_inode *dip)
 	u64 *buf;
 	u64 *from, *to;
 	u64 block;
-	u64 disksize = i_size_read(&dip->i_inode);
 	int x;
 	int error = 0;
 
 	hsize = 1 << dip->i_depth;
-	if (hsize * sizeof(u64) != disksize) {
+	if (hsize * sizeof(u64) != dip->i_disksize) {
 		gfs2_consist_inode(dip);
 		return -EIO;
 	}
@@ -1077,7 +1072,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
 	if (!buf)
 		return -ENOMEM;
 
-	for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) {
+	for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) {
 		error = gfs2_dir_read_data(dip, (char *)buf,
 					    block * sdp->sd_hash_bsize,
 					    sdp->sd_hash_bsize, 1);
@@ -1375,7 +1370,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
 	unsigned depth = 0;
 
 	hsize = 1 << dip->i_depth;
-	if (hsize * sizeof(u64) != i_size_read(inode)) {
+	if (hsize * sizeof(u64) != dip->i_disksize) {
 		gfs2_consist_inode(dip);
 		return -EIO;
 	}
@@ -1789,7 +1784,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
 	int error = 0;
 
 	hsize = 1 << dip->i_depth;
-	if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
+	if (hsize * sizeof(u64) != dip->i_disksize) {
 		gfs2_consist_inode(dip);
 		return -EIO;
 	}
diff --git a/trunk/fs/gfs2/dir.h b/trunk/fs/gfs2/dir.h
index a98f644bd3df..4f919440c3be 100644
--- a/trunk/fs/gfs2/dir.h
+++ b/trunk/fs/gfs2/dir.h
@@ -17,24 +17,23 @@ struct inode;
 struct gfs2_inode;
 struct gfs2_inum;
 
-extern struct inode *gfs2_dir_search(struct inode *dir,
-				     const struct qstr *filename);
-extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
-			  const struct gfs2_inode *ip);
-extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
-			const struct gfs2_inode *ip, unsigned int type);
-extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
-extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
-			 filldir_t filldir);
-extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-			  const struct gfs2_inode *nip, unsigned int new_type);
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename);
+int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
+		   const struct gfs2_inode *ip);
+int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
+		 const struct gfs2_inode *ip, unsigned int type);
+int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
+int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
+		  filldir_t filldir);
+int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
+		   const struct gfs2_inode *nip, unsigned int new_type);
 
-extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
+int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
 
-extern int gfs2_diradd_alloc_required(struct inode *dir,
-				      const struct qstr *filename);
-extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
-				   struct buffer_head **bhp);
+int gfs2_diradd_alloc_required(struct inode *dir,
+			       const struct qstr *filename);
+int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
+			    struct buffer_head **bhp);
 
 static inline u32 gfs2_disk_hash(const char *data, int len)
 {
@@ -62,7 +61,4 @@ static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct
 	memcpy(dent + 1, name->name, name->len);
 }
 
-extern struct qstr gfs2_qdot;
-extern struct qstr gfs2_qdotdot;
-
 #endif /* __DIR_DOT_H__ */
diff --git a/trunk/fs/gfs2/export.c b/trunk/fs/gfs2/export.c
index 06d582732d34..dfe237a3f8ad 100644
--- a/trunk/fs/gfs2/export.c
+++ b/trunk/fs/gfs2/export.c
@@ -126,9 +126,16 @@ static int gfs2_get_name(struct dentry *parent, char *name,
 
 static struct dentry *gfs2_get_parent(struct dentry *child)
 {
+	struct qstr dotdot;
 	struct dentry *dentry;
 
-	dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1));
+	/*
+	 * XXX(hch): it would be a good idea to keep this around as a
+	 *	     static variable.
+	 */
+	gfs2_str2qstr(&dotdot, "..");
+
+	dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1));
 	if (!IS_ERR(dentry))
 		dentry->d_op = &gfs2_dops;
 	return dentry;
diff --git a/trunk/fs/gfs2/file.c b/trunk/fs/gfs2/file.c
index 237ee6a940df..4edd662c8232 100644
--- a/trunk/fs/gfs2/file.c
+++ b/trunk/fs/gfs2/file.c
@@ -382,10 +382,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	rblocks = RES_DINODE + ind_blocks;
 	if (gfs2_is_jdata(ip))
 		rblocks += data_blocks ? data_blocks : 1;
-	if (ind_blocks || data_blocks) {
+	if (ind_blocks || data_blocks)
 		rblocks += RES_STATFS + RES_QUOTA;
-		rblocks += gfs2_rg_blocks(al);
-	}
 	ret = gfs2_trans_begin(sdp, rblocks, 0);
 	if (ret)
 		goto out_trans_fail;
@@ -493,7 +491,7 @@ static int gfs2_open(struct inode *inode, struct file *file)
 			goto fail;
 
 		if (!(file->f_flags & O_LARGEFILE) &&
-		    i_size_read(inode) > MAX_NON_LFS) {
+		    ip->i_disksize > MAX_NON_LFS) {
 			error = -EOVERFLOW;
 			goto fail_gunlock;
 		}
diff --git a/trunk/fs/gfs2/glock.c b/trunk/fs/gfs2/glock.c
index 87778857f099..9adf8f924e08 100644
--- a/trunk/fs/gfs2/glock.c
+++ b/trunk/fs/gfs2/glock.c
@@ -441,8 +441,6 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
 		else
 			gfs2_glock_put_nolock(gl);
 	}
-	if (held1 && held2 && list_empty(&gl->gl_holders))
-		clear_bit(GLF_QUEUED, &gl->gl_flags);
 
 	gl->gl_state = new_state;
 	gl->gl_tchange = jiffies;
@@ -1014,7 +1012,6 @@ __acquires(&gl->gl_spin)
 		if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
 			insert_pt = &gh2->gh_list;
 	}
-	set_bit(GLF_QUEUED, &gl->gl_flags);
 	if (likely(insert_pt == NULL)) {
 		list_add_tail(&gh->gh_list, &gl->gl_holders);
 		if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
@@ -1313,12 +1310,10 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
 
 	gfs2_glock_hold(gl);
 	holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
-	if (test_bit(GLF_QUEUED, &gl->gl_flags)) {
-		if (time_before(now, holdtime))
-			delay = holdtime - now;
-		if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
-			delay = gl->gl_ops->go_min_hold_time;
-	}
+	if (time_before(now, holdtime))
+		delay = holdtime - now;
+	if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
+		delay = gl->gl_ops->go_min_hold_time;
 
 	spin_lock(&gl->gl_spin);
 	handle_callback(gl, state, delay);
@@ -1517,7 +1512,7 @@ static void clear_glock(struct gfs2_glock *gl)
 	spin_unlock(&lru_lock);
 
 	spin_lock(&gl->gl_spin);
-	if (gl->gl_state != LM_ST_UNLOCKED)
+	if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED)
 		handle_callback(gl, LM_ST_UNLOCKED, 0);
 	spin_unlock(&gl->gl_spin);
 	gfs2_glock_hold(gl);
@@ -1665,8 +1660,6 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
 		*p++ = 'I';
 	if (test_bit(GLF_FROZEN, gflags))
 		*p++ = 'F';
-	if (test_bit(GLF_QUEUED, gflags))
-		*p++ = 'q';
 	*p = 0;
 	return buf;
 }
@@ -1783,12 +1776,10 @@ int __init gfs2_glock_init(void)
 	}
 #endif
 
-	glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER |
-					  WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+	glock_workqueue = create_workqueue("glock_workqueue");
 	if (IS_ERR(glock_workqueue))
 		return PTR_ERR(glock_workqueue);
-	gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER |
-						WQ_FREEZEABLE, 0);
+	gfs2_delete_workqueue = create_workqueue("delete_workqueue");
 	if (IS_ERR(gfs2_delete_workqueue)) {
 		destroy_workqueue(glock_workqueue);
 		return PTR_ERR(gfs2_delete_workqueue);
diff --git a/trunk/fs/gfs2/glock.h b/trunk/fs/gfs2/glock.h
index db1c26d6d220..2bda1911b156 100644
--- a/trunk/fs/gfs2/glock.h
+++ b/trunk/fs/gfs2/glock.h
@@ -215,7 +215,7 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
 
 /**
- * gfs2_glock_nq_init - initialize a holder and enqueue it on a glock
+ * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
  * @gl: the glock
  * @state: the state we're requesting
  * @flags: the modifier flags
diff --git a/trunk/fs/gfs2/glops.c b/trunk/fs/gfs2/glops.c
index 0d149dcc04e5..49f97d3bb690 100644
--- a/trunk/fs/gfs2/glops.c
+++ b/trunk/fs/gfs2/glops.c
@@ -262,12 +262,13 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 	const struct gfs2_inode *ip = gl->gl_object;
 	if (ip == NULL)
 		return 0;
-	gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n",
+	gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu/%llu\n",
 		  (unsigned long long)ip->i_no_formal_ino,
 		  (unsigned long long)ip->i_no_addr,
 		  IF2DT(ip->i_inode.i_mode), ip->i_flags,
 		  (unsigned int)ip->i_diskflags,
-		  (unsigned long long)i_size_read(&ip->i_inode));
+		  (unsigned long long)ip->i_inode.i_size,
+		  (unsigned long long)ip->i_disksize);
 	return 0;
 }
 
@@ -452,6 +453,7 @@ const struct gfs2_glock_operations *gfs2_glops_list[] = {
 	[LM_TYPE_META] = &gfs2_meta_glops,
 	[LM_TYPE_INODE] = &gfs2_inode_glops,
 	[LM_TYPE_RGRP] = &gfs2_rgrp_glops,
+	[LM_TYPE_NONDISK] = &gfs2_trans_glops,
 	[LM_TYPE_IOPEN] = &gfs2_iopen_glops,
 	[LM_TYPE_FLOCK] = &gfs2_flock_glops,
 	[LM_TYPE_NONDISK] = &gfs2_nondisk_glops,
diff --git a/trunk/fs/gfs2/incore.h b/trunk/fs/gfs2/incore.h
index 764fbb49efc8..fdbf4b366fa5 100644
--- a/trunk/fs/gfs2/incore.h
+++ b/trunk/fs/gfs2/incore.h
@@ -196,7 +196,6 @@ enum {
 	GLF_REPLY_PENDING		= 9,
 	GLF_INITIAL			= 10,
 	GLF_FROZEN			= 11,
-	GLF_QUEUED			= 12,
 };
 
 struct gfs2_glock {
@@ -268,6 +267,7 @@ struct gfs2_inode {
 	u64 i_no_formal_ino;
 	u64 i_generation;
 	u64 i_eattr;
+	loff_t i_disksize;
 	unsigned long i_flags;		/* GIF_... */
 	struct gfs2_glock *i_gl; /* Move into i_gh? */
 	struct gfs2_holder i_iopen_gh;
@@ -416,8 +416,11 @@ struct gfs2_args {
 	char ar_locktable[GFS2_LOCKNAME_LEN];	/* Name of the Lock Table */
 	char ar_hostdata[GFS2_LOCKNAME_LEN];	/* Host specific data */
 	unsigned int ar_spectator:1;		/* Don't get a journal */
+	unsigned int ar_ignore_local_fs:1;	/* Ignore optimisations */
 	unsigned int ar_localflocks:1;		/* Let the VFS do flock|fcntl */
+	unsigned int ar_localcaching:1;		/* Local caching */
 	unsigned int ar_debug:1;		/* Oops on errors */
+	unsigned int ar_upgrade:1;		/* Upgrade ondisk format */
 	unsigned int ar_posix_acl:1;		/* Enable posix acls */
 	unsigned int ar_quota:2;		/* off/account/on */
 	unsigned int ar_suiddir:1;		/* suiddir support */
@@ -494,7 +497,7 @@ struct gfs2_sb_host {
  */
 
 struct lm_lockstruct {
-	int ls_jid;
+	unsigned int ls_jid;
 	unsigned int ls_first;
 	unsigned int ls_first_done;
 	unsigned int ls_nodir;
@@ -569,7 +572,6 @@ struct gfs2_sbd {
 	struct list_head sd_rindex_mru_list;
 	struct gfs2_rgrpd *sd_rindex_forward;
 	unsigned int sd_rgrps;
-	unsigned int sd_max_rg_data;
 
 	/* Journal index stuff */
 
diff --git a/trunk/fs/gfs2/inode.c b/trunk/fs/gfs2/inode.c
index 06370f8bd8cf..08140f185a37 100644
--- a/trunk/fs/gfs2/inode.c
+++ b/trunk/fs/gfs2/inode.c
@@ -359,7 +359,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	 * to do that.
 	 */
 	ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
-	i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
+	ip->i_disksize = be64_to_cpu(str->di_size);
+	i_size_write(&ip->i_inode, ip->i_disksize);
 	gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
 	atime.tv_sec = be64_to_cpu(str->di_atime);
 	atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
@@ -1054,7 +1055,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
 	str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
 	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
-	str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
+	str->di_size = cpu_to_be64(ip->i_disksize);
 	str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
 	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
 	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
@@ -1084,8 +1085,8 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 	       (unsigned long long)ip->i_no_formal_ino);
 	printk(KERN_INFO "  no_addr = %llu\n",
 	       (unsigned long long)ip->i_no_addr);
-	printk(KERN_INFO "  i_size = %llu\n",
-	       (unsigned long long)i_size_read(&ip->i_inode));
+	printk(KERN_INFO "  i_disksize = %llu\n",
+	       (unsigned long long)ip->i_disksize);
 	printk(KERN_INFO "  blocks = %llu\n",
 	       (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode));
 	printk(KERN_INFO "  i_goal = %llu\n",
diff --git a/trunk/fs/gfs2/inode.h b/trunk/fs/gfs2/inode.h
index 6720d7d5fbc6..300ada3f21de 100644
--- a/trunk/fs/gfs2/inode.h
+++ b/trunk/fs/gfs2/inode.h
@@ -19,8 +19,6 @@ extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
 extern int gfs2_internal_read(struct gfs2_inode *ip,
 			      struct file_ra_state *ra_state,
 			      char *buf, loff_t *pos, unsigned size);
-extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
-				   unsigned int from, unsigned int to);
 extern void gfs2_set_aops(struct inode *inode);
 
 static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
@@ -82,19 +80,6 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip,
 	dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr);
 }
 
-static inline int gfs2_check_internal_file_size(struct inode *inode,
-						u64 minsize, u64 maxsize)
-{
-	u64 size = i_size_read(inode);
-	if (size < minsize || size > maxsize)
-		goto err;
-	if (size & ((1 << inode->i_blkbits) - 1))
-		goto err;
-	return 0;
-err:
-	gfs2_consist_inode(GFS2_I(inode));
-	return -EIO;
-}
 
 extern void gfs2_set_iop(struct inode *inode);
 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
diff --git a/trunk/fs/gfs2/lock_dlm.c b/trunk/fs/gfs2/lock_dlm.c
index 1c09425b45fd..0e0470ed34c2 100644
--- a/trunk/fs/gfs2/lock_dlm.c
+++ b/trunk/fs/gfs2/lock_dlm.c
@@ -42,9 +42,9 @@ static void gdlm_ast(void *arg)
 		ret |= LM_OUT_CANCELED;
 		goto out;
 	case -EAGAIN: /* Try lock fails */
-	case -EDEADLK: /* Deadlock detected */
 		goto out;
-	case -ETIMEDOUT: /* Canceled due to timeout */
+	case -EINVAL: /* Invalid */
+	case -ENOMEM: /* Out of memory */
 		ret |= LM_OUT_ERROR;
 		goto out;
 	case 0: /* Success */
diff --git a/trunk/fs/gfs2/main.c b/trunk/fs/gfs2/main.c
index d7eb1e209aa8..b1e9630eb46a 100644
--- a/trunk/fs/gfs2/main.c
+++ b/trunk/fs/gfs2/main.c
@@ -24,7 +24,6 @@
 #include "glock.h"
 #include "quota.h"
 #include "recovery.h"
-#include "dir.h"
 
 static struct shrinker qd_shrinker = {
 	.shrink = gfs2_shrink_qd_memory,
@@ -79,9 +78,6 @@ static int __init init_gfs2_fs(void)
 {
 	int error;
 
-	gfs2_str2qstr(&gfs2_qdot, ".");
-	gfs2_str2qstr(&gfs2_qdotdot, "..");
-
 	error = gfs2_sys_init();
 	if (error)
 		return error;
@@ -144,7 +140,7 @@ static int __init init_gfs2_fs(void)
 
 	error = -ENOMEM;
 	gfs_recovery_wq = alloc_workqueue("gfs_recovery",
-					  WQ_RESCUER | WQ_FREEZEABLE, 0);
+					  WQ_NON_REENTRANT | WQ_RESCUER, 0);
 	if (!gfs_recovery_wq)
 		goto fail_wq;
 
diff --git a/trunk/fs/gfs2/ops_fstype.c b/trunk/fs/gfs2/ops_fstype.c
index aeafc233dc89..4d4b1e8ac64c 100644
--- a/trunk/fs/gfs2/ops_fstype.c
+++ b/trunk/fs/gfs2/ops_fstype.c
@@ -38,6 +38,14 @@
 #define DO 0
 #define UNDO 1
 
+static const u32 gfs2_old_fs_formats[] = {
+        0
+};
+
+static const u32 gfs2_old_multihost_formats[] = {
+        0
+};
+
 /**
  * gfs2_tune_init - Fill a gfs2_tune structure with default values
  * @gt: tune
@@ -127,6 +135,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
 static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
 {
+	unsigned int x;
+
 	if (sb->sb_magic != GFS2_MAGIC ||
 	    sb->sb_type != GFS2_METATYPE_SB) {
 		if (!silent)
@@ -140,9 +150,55 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int sile
 	    sb->sb_multihost_format == GFS2_FORMAT_MULTI)
 		return 0;
 
-	fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+	if (sb->sb_fs_format != GFS2_FORMAT_FS) {
+		for (x = 0; gfs2_old_fs_formats[x]; x++)
+			if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
+				break;
 
-	return -EINVAL;
+		if (!gfs2_old_fs_formats[x]) {
+			printk(KERN_WARNING
+			       "GFS2: code version (%u, %u) is incompatible "
+			       "with ondisk format (%u, %u)\n",
+			       GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+			       sb->sb_fs_format, sb->sb_multihost_format);
+			printk(KERN_WARNING
+			       "GFS2: I don't know how to upgrade this FS\n");
+			return -EINVAL;
+		}
+	}
+
+	if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
+		for (x = 0; gfs2_old_multihost_formats[x]; x++)
+			if (gfs2_old_multihost_formats[x] ==
+			    sb->sb_multihost_format)
+				break;
+
+		if (!gfs2_old_multihost_formats[x]) {
+			printk(KERN_WARNING
+			       "GFS2: code version (%u, %u) is incompatible "
+			       "with ondisk format (%u, %u)\n",
+			       GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+			       sb->sb_fs_format, sb->sb_multihost_format);
+			printk(KERN_WARNING
+			       "GFS2: I don't know how to upgrade this FS\n");
+			return -EINVAL;
+		}
+	}
+
+	if (!sdp->sd_args.ar_upgrade) {
+		printk(KERN_WARNING
+		       "GFS2: code version (%u, %u) is incompatible "
+		       "with ondisk format (%u, %u)\n",
+		       GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+		       sb->sb_fs_format, sb->sb_multihost_format);
+		printk(KERN_INFO
+		       "GFS2: Use the \"upgrade\" mount option to upgrade "
+		       "the FS\n");
+		printk(KERN_INFO "GFS2: See the manual for more details\n");
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 static void end_bio_io_page(struct bio *bio, int error)
@@ -530,7 +586,7 @@ static int map_journal_extents(struct gfs2_sbd *sdp)
 
 	prev_db = 0;
 
-	for (lb = 0; lb < i_size_read(jd->jd_inode) >> sdp->sd_sb.sb_bsize_shift; lb++) {
+	for (lb = 0; lb < ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; lb++) {
 		bh.b_state = 0;
 		bh.b_blocknr = 0;
 		bh.b_size = 1 << ip->i_inode.i_blkbits;
@@ -966,6 +1022,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
 	if (!strcmp("lock_nolock", proto)) {
 		lm = &nolock_ops;
 		sdp->sd_args.ar_localflocks = 1;
+		sdp->sd_args.ar_localcaching = 1;
 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
 	} else if (!strcmp("lock_dlm", proto)) {
 		lm = &gfs2_dlm_ops;
@@ -1056,6 +1113,8 @@ static int gfs2_journalid_wait(void *word)
 
 static int wait_on_journal(struct gfs2_sbd *sdp)
 {
+	if (sdp->sd_args.ar_spectator)
+		return 0;
 	if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
 		return 0;
 
@@ -1158,20 +1217,6 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
 	if (error)
 		goto fail_sb;
 
-	/*
-	 * If user space has failed to join the cluster or some similar
-	 * failure has occurred, then the journal id will contain a
-	 * negative (error) number. This will then be returned to the
-	 * caller (of the mount syscall). We do this even for spectator
-	 * mounts (which just write a jid of 0 to indicate "ok" even though
-	 * the jid is unused in the spectator case)
-	 */
-	if (sdp->sd_lockstruct.ls_jid < 0) {
-		error = sdp->sd_lockstruct.ls_jid;
-		sdp->sd_lockstruct.ls_jid = 0;
-		goto fail_sb;
-	}
-
 	error = init_inodes(sdp, DO);
 	if (error)
 		goto fail_sb;
diff --git a/trunk/fs/gfs2/ops_inode.c b/trunk/fs/gfs2/ops_inode.c
index 0534510200d5..1009be2c9737 100644
--- a/trunk/fs/gfs2/ops_inode.c
+++ b/trunk/fs/gfs2/ops_inode.c
@@ -18,8 +18,6 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/fiemap.h>
-#include <linux/swap.h>
-#include <linux/falloc.h>
 #include <asm/uaccess.h>
 
 #include "gfs2.h"
@@ -219,7 +217,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 			goto out_gunlock_q;
 
 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 gfs2_rg_blocks(al) +
+					 al->al_rgd->rd_length +
 					 2 * RES_DINODE + RES_STATFS +
 					 RES_QUOTA, 0);
 		if (error)
@@ -408,6 +406,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 
 	ip = ghs[1].gh_gl->gl_object;
 
+	ip->i_disksize = size;
 	i_size_write(inode, size);
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -462,7 +461,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	ip = ghs[1].gh_gl->gl_object;
 
 	ip->i_inode.i_nlink = 2;
-	i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
+	ip->i_disksize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
 	ip->i_diskflags |= GFS2_DIF_JDATA;
 	ip->i_entries = 2;
 
@@ -471,15 +470,18 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (!gfs2_assert_withdraw(sdp, !error)) {
 		struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
 		struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
+		struct qstr str;
 
+		gfs2_str2qstr(&str, ".");
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
+		gfs2_qstr2dirent(&str, GFS2_DIRENT_SIZE(str.len), dent);
 		dent->de_inum = di->di_num; /* already GFS2 endian */
 		dent->de_type = cpu_to_be16(DT_DIR);
 		di->di_entries = cpu_to_be32(1);
 
+		gfs2_str2qstr(&str, "..");
 		dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
-		gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
+		gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
 
 		gfs2_inum_out(dip, dent);
 		dent->de_type = cpu_to_be16(DT_DIR);
@@ -520,6 +522,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 		       struct gfs2_inode *ip)
 {
+	struct qstr dotname;
 	int error;
 
 	if (ip->i_entries != 2) {
@@ -536,11 +539,13 @@ static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 	if (error)
 		return error;
 
-	error = gfs2_dir_del(ip, &gfs2_qdot);
+	gfs2_str2qstr(&dotname, ".");
+	error = gfs2_dir_del(ip, &dotname);
 	if (error)
 		return error;
 
-	error = gfs2_dir_del(ip, &gfs2_qdotdot);
+	gfs2_str2qstr(&dotname, "..");
+	error = gfs2_dir_del(ip, &dotname);
 	if (error)
 		return error;
 
@@ -689,8 +694,11 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
 	struct inode *dir = &to->i_inode;
 	struct super_block *sb = dir->i_sb;
 	struct inode *tmp;
+	struct qstr dotdot;
 	int error = 0;
 
+	gfs2_str2qstr(&dotdot, "..");
+
 	igrab(dir);
 
 	for (;;) {
@@ -703,7 +711,7 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
 			break;
 		}
 
-		tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
+		tmp = gfs2_lookupi(dir, &dotdot, 1);
 		if (IS_ERR(tmp)) {
 			error = PTR_ERR(tmp);
 			break;
@@ -736,7 +744,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
 	struct gfs2_inode *nip = NULL;
 	struct gfs2_sbd *sdp = GFS2_SB(odir);
-	struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
+	struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
 	struct gfs2_rgrpd *nrgd;
 	unsigned int num_gh;
 	int dir_rename = 0;
@@ -750,9 +758,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 			return 0;
 	}
 
-	error = gfs2_rindex_hold(sdp, &ri_gh);
-	if (error)
-		return error;
 
 	if (odip != ndip) {
 		error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
@@ -882,12 +887,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 
 		al->al_requested = sdp->sd_max_dirres;
 
-		error = gfs2_inplace_reserve_ri(ndip);
+		error = gfs2_inplace_reserve(ndip);
 		if (error)
 			goto out_gunlock_q;
 
 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 gfs2_rg_blocks(al) +
+					 al->al_rgd->rd_length +
 					 4 * RES_DINODE + 4 * RES_LEAF +
 					 RES_STATFS + RES_QUOTA + 4, 0);
 		if (error)
@@ -915,6 +920,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	}
 
 	if (dir_rename) {
+		struct qstr name;
+		gfs2_str2qstr(&name, "..");
+
 		error = gfs2_change_nlink(ndip, +1);
 		if (error)
 			goto out_end_trans;
@@ -922,7 +930,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		if (error)
 			goto out_end_trans;
 
-		error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
+		error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR);
 		if (error)
 			goto out_end_trans;
 	} else {
@@ -964,7 +972,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	if (r_gh.gh_gl)
 		gfs2_glock_dq_uninit(&r_gh);
 out:
-	gfs2_glock_dq_uninit(&ri_gh);
 	return error;
 }
 
@@ -983,7 +990,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
 	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
 	struct gfs2_holder i_gh;
 	struct buffer_head *dibh;
-	unsigned int x, size;
+	unsigned int x;
 	char *buf;
 	int error;
 
@@ -995,8 +1002,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
 		return NULL;
 	}
 
-	size = (unsigned int)i_size_read(&ip->i_inode);
-	if (size == 0) {
+	if (!ip->i_disksize) {
 		gfs2_consist_inode(ip);
 		buf = ERR_PTR(-EIO);
 		goto out;
@@ -1008,7 +1014,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
 		goto out;
 	}
 
-	x = size + 1;
+	x = ip->i_disksize + 1;
 	buf = kmalloc(x, GFP_NOFS);
 	if (!buf)
 		buf = ERR_PTR(-ENOMEM);
@@ -1065,6 +1071,30 @@ int gfs2_permission(struct inode *inode, int mask)
 	return error;
 }
 
+/*
+ * XXX(truncate): the truncate_setsize calls should be moved to the end.
+ */
+static int setattr_size(struct inode *inode, struct iattr *attr)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	int error;
+
+	if (attr->ia_size != ip->i_disksize) {
+		error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
+		if (error)
+			return error;
+		truncate_setsize(inode, attr->ia_size);
+		gfs2_trans_end(sdp);
+	}
+
+	error = gfs2_truncatei(ip, attr->ia_size);
+	if (error && (inode->i_size != ip->i_disksize))
+		i_size_write(inode, ip->i_disksize);
+
+	return error;
+}
+
 static int setattr_chown(struct inode *inode, struct iattr *attr)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
@@ -1165,7 +1195,7 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 		goto out;
 
 	if (attr->ia_valid & ATTR_SIZE)
-		error = gfs2_setattr_size(inode, attr->ia_size);
+		error = setattr_size(inode, attr);
 	else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
 		error = setattr_chown(inode, attr);
 	else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
@@ -1271,257 +1301,6 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
 	return ret;
 }
 
-static void empty_write_end(struct page *page, unsigned from,
-			   unsigned to)
-{
-	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
-
-	page_zero_new_buffers(page, from, to);
-	flush_dcache_page(page);
-	mark_page_accessed(page);
-
-	if (!gfs2_is_writeback(ip))
-		gfs2_page_add_databufs(ip, page, from, to);
-
-	block_commit_write(page, from, to);
-}
-
-
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
-{
-	unsigned start, end, next;
-	struct buffer_head *bh, *head;
-	int error;
-
-	if (!page_has_buffers(page)) {
-		error = block_prepare_write(page, from, to, gfs2_block_map);
-		if (unlikely(error))
-			return error;
-
-		empty_write_end(page, from, to);
-		return 0;
-	}
-
-	bh = head = page_buffers(page);
-	next = end = 0;
-	while (next < from) {
-		next += bh->b_size;
-		bh = bh->b_this_page;
-	}
-	start = next;
-	do {
-		next += bh->b_size;
-		if (buffer_mapped(bh)) {
-			if (end) {
-				error = block_prepare_write(page, start, end,
-							    gfs2_block_map);
-				if (unlikely(error))
-					return error;
-				empty_write_end(page, start, end);
-				end = 0;
-			}
-			start = next;
-		}
-		else
-			end = next;
-		bh = bh->b_this_page;
-	} while (next < to);
-
-	if (end) {
-		error = block_prepare_write(page, start, end, gfs2_block_map);
-		if (unlikely(error))
-			return error;
-		empty_write_end(page, start, end);
-	}
-
-	return 0;
-}
-
-static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
-			   int mode)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct buffer_head *dibh;
-	int error;
-	u64 start = offset >> PAGE_CACHE_SHIFT;
-	unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
-	u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
-	pgoff_t curr;
-	struct page *page;
-	unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
-	unsigned int from, to;
-
-	if (!end_offset)
-		end_offset = PAGE_CACHE_SIZE;
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (unlikely(error))
-		goto out;
-
-	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-
-	if (gfs2_is_stuffed(ip)) {
-		error = gfs2_unstuff_dinode(ip, NULL);
-		if (unlikely(error))
-			goto out;
-	}
-
-	curr = start;
-	offset = start << PAGE_CACHE_SHIFT;
-	from = start_offset;
-	to = PAGE_CACHE_SIZE;
-	while (curr <= end) {
-		page = grab_cache_page_write_begin(inode->i_mapping, curr,
-						   AOP_FLAG_NOFS);
-		if (unlikely(!page)) {
-			error = -ENOMEM;
-			goto out;
-		}
-
-		if (curr == end)
-			to = end_offset;
-		error = write_empty_blocks(page, from, to);
-		if (!error && offset + to > inode->i_size &&
-		    !(mode & FALLOC_FL_KEEP_SIZE)) {
-			i_size_write(inode, offset + to);
-		}
-		unlock_page(page);
-		page_cache_release(page);
-		if (error)
-			goto out;
-		curr++;
-		offset += PAGE_CACHE_SIZE;
-		from = 0;
-	}
-
-	gfs2_dinode_out(ip, dibh->b_data);
-	mark_inode_dirty(inode);
-
-	brelse(dibh);
-
-out:
-	return error;
-}
-
-static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
-			    unsigned int *data_blocks, unsigned int *ind_blocks)
-{
-	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
-	unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
-
-	for (tmp = max_data; tmp > sdp->sd_diptrs;) {
-		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
-		max_data -= tmp;
-	}
-	/* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
-	   so it might end up with fewer data blocks */
-	if (max_data <= *data_blocks)
-		return;
-	*data_blocks = max_data;
-	*ind_blocks = max_blocks - max_data;
-	*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
-	if (*len > max) {
-		*len = max;
-		gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
-	}
-}
-
-static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset,
-			   loff_t len)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct gfs2_inode *ip = GFS2_I(inode);
-	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
-	loff_t bytes, max_bytes;
-	struct gfs2_alloc *al;
-	int error;
-	loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
-	next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
-
-	offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
-		 sdp->sd_sb.sb_bsize_shift;
-
-	len = next - offset;
-	bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
-	if (!bytes)
-		bytes = UINT_MAX;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
-	error = gfs2_glock_nq(&ip->i_gh);
-	if (unlikely(error))
-		goto out_uninit;
-
-	if (!gfs2_write_alloc_required(ip, offset, len))
-		goto out_unlock;
-
-	while (len > 0) {
-		if (len < bytes)
-			bytes = len;
-		al = gfs2_alloc_get(ip);
-		if (!al) {
-			error = -ENOMEM;
-			goto out_unlock;
-		}
-
-		error = gfs2_quota_lock_check(ip);
-		if (error)
-			goto out_alloc_put;
-
-retry:
-		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
-
-		al->al_requested = data_blocks + ind_blocks;
-		error = gfs2_inplace_reserve(ip);
-		if (error) {
-			if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
-				bytes >>= 1;
-				goto retry;
-			}
-			goto out_qunlock;
-		}
-		max_bytes = bytes;
-		calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
-		al->al_requested = data_blocks + ind_blocks;
-
-		rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
-			  RES_RG_HDR + gfs2_rg_blocks(al);
-		if (gfs2_is_jdata(ip))
-			rblocks += data_blocks ? data_blocks : 1;
-
-		error = gfs2_trans_begin(sdp, rblocks,
-					 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
-		if (error)
-			goto out_trans_fail;
-
-		error = fallocate_chunk(inode, offset, max_bytes, mode);
-		gfs2_trans_end(sdp);
-
-		if (error)
-			goto out_trans_fail;
-
-		len -= max_bytes;
-		offset += max_bytes;
-		gfs2_inplace_release(ip);
-		gfs2_quota_unlock(ip);
-		gfs2_alloc_put(ip);
-	}
-	goto out_unlock;
-
-out_trans_fail:
-	gfs2_inplace_release(ip);
-out_qunlock:
-	gfs2_quota_unlock(ip);
-out_alloc_put:
-	gfs2_alloc_put(ip);
-out_unlock:
-	gfs2_glock_dq(&ip->i_gh);
-out_uninit:
-	gfs2_holder_uninit(&ip->i_gh);
-	return error;
-}
-
-
 static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		       u64 start, u64 len)
 {
@@ -1572,7 +1351,6 @@ const struct inode_operations gfs2_file_iops = {
 	.getxattr = gfs2_getxattr,
 	.listxattr = gfs2_listxattr,
 	.removexattr = gfs2_removexattr,
-	.fallocate = gfs2_fallocate,
 	.fiemap = gfs2_fiemap,
 };
 
diff --git a/trunk/fs/gfs2/quota.c b/trunk/fs/gfs2/quota.c
index 58a9b9998b42..1bc6b5695e6d 100644
--- a/trunk/fs/gfs2/quota.c
+++ b/trunk/fs/gfs2/quota.c
@@ -735,8 +735,10 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 		goto out;
 
 	size = loc + sizeof(struct gfs2_quota);
-	if (size > inode->i_size)
+	if (size > inode->i_size) {
+		ip->i_disksize = size;
 		i_size_write(inode, size);
+	}
 	inode->i_mtime = inode->i_atime = CURRENT_TIME;
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
@@ -815,7 +817,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 		goto out_alloc;
 
 	if (nalloc)
-		blocks += gfs2_rg_blocks(al) + nalloc * ind_blocks + RES_STATFS;
+		blocks += al->al_rgd->rd_length + nalloc * ind_blocks + RES_STATFS;
 
 	error = gfs2_trans_begin(sdp, blocks, 0);
 	if (error)
@@ -1188,17 +1190,18 @@ static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *
 int gfs2_quota_init(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
-	u64 size = i_size_read(sdp->sd_qc_inode);
-	unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift;
+	unsigned int blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift;
 	unsigned int x, slot = 0;
 	unsigned int found = 0;
 	u64 dblock;
 	u32 extlen = 0;
 	int error;
 
-	if (gfs2_check_internal_file_size(sdp->sd_qc_inode, 1, 64 << 20))
+	if (!ip->i_disksize || ip->i_disksize > (64 << 20) ||
+	    ip->i_disksize & (sdp->sd_sb.sb_bsize - 1)) {
+		gfs2_consist_inode(ip);
 		return -EIO;
-
+	}
 	sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block;
 	sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE);
 
@@ -1586,7 +1589,6 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 		error = gfs2_inplace_reserve(ip);
 		if (error)
 			goto out_alloc;
-		blocks += gfs2_rg_blocks(al);
 	}
 
 	error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0);
diff --git a/trunk/fs/gfs2/recovery.c b/trunk/fs/gfs2/recovery.c
index f2a02edcac8f..f7f89a94a5a4 100644
--- a/trunk/fs/gfs2/recovery.c
+++ b/trunk/fs/gfs2/recovery.c
@@ -455,13 +455,11 @@ void gfs2_recover_func(struct work_struct *work)
 	int ro = 0;
 	unsigned int pass;
 	int error;
-	int jlocked = 0;
 
-	if (sdp->sd_args.ar_spectator ||
-	    (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) {
+	if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
 		fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
 			jd->jd_jid);
-		jlocked = 1;
+
 		/* Acquire the journal lock so we can do recovery */
 
 		error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
@@ -556,12 +554,13 @@ void gfs2_recover_func(struct work_struct *work)
 			jd->jd_jid, t);
 	}
 
+	if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
+		gfs2_glock_dq_uninit(&ji_gh);
+
 	gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
 
-	if (jlocked) {
-		gfs2_glock_dq_uninit(&ji_gh);
+	if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
 		gfs2_glock_dq_uninit(&j_gh);
-	}
 
 	fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
 	goto done;
@@ -569,7 +568,7 @@ void gfs2_recover_func(struct work_struct *work)
 fail_gunlock_tr:
 	gfs2_glock_dq_uninit(&t_gh);
 fail_gunlock_ji:
-	if (jlocked) {
+	if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
 		gfs2_glock_dq_uninit(&ji_gh);
 fail_gunlock_j:
 		gfs2_glock_dq_uninit(&j_gh);
diff --git a/trunk/fs/gfs2/rgrp.c b/trunk/fs/gfs2/rgrp.c
index fb67f593f408..171a744f8e45 100644
--- a/trunk/fs/gfs2/rgrp.c
+++ b/trunk/fs/gfs2/rgrp.c
@@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 	for (rgrps = 0;; rgrps++) {
 		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 
-		if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode))
+		if (pos + sizeof(struct gfs2_rindex) >= ip->i_disksize)
 			break;
 		error = gfs2_internal_read(ip, &ra_state, buf, &pos,
 					   sizeof(struct gfs2_rindex));
@@ -588,9 +588,7 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct inode *inode = &ip->i_inode;
 	struct file_ra_state ra_state;
-	u64 rgrp_count = i_size_read(inode);
-	struct gfs2_rgrpd *rgd;
-	unsigned int max_data = 0;
+	u64 rgrp_count = ip->i_disksize;
 	int error;
 
 	do_div(rgrp_count, sizeof(struct gfs2_rindex));
@@ -605,10 +603,6 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 		}
 	}
 
-	list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
-		if (rgd->rd_data > max_data)
-			max_data = rgd->rd_data;
-	sdp->sd_max_rg_data = max_data;
 	sdp->sd_rindex_uptodate = 1;
 	return 0;
 }
@@ -628,15 +622,13 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip)
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct inode *inode = &ip->i_inode;
 	struct file_ra_state ra_state;
-	struct gfs2_rgrpd *rgd;
-	unsigned int max_data = 0;
 	int error;
 
 	file_ra_state_init(&ra_state, inode->i_mapping);
 	for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
 		/* Ignore partials */
 		if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
-		    i_size_read(inode))
+		    ip->i_disksize)
 			break;
 		error = read_rindex_entry(ip, &ra_state);
 		if (error) {
@@ -644,10 +636,6 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip)
 			return error;
 		}
 	}
-	list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
-		if (rgd->rd_data > max_data)
-			max_data = rgd->rd_data;
-	sdp->sd_max_rg_data = max_data;
 
 	sdp->sd_rindex_uptodate = 1;
 	return 0;
@@ -1200,8 +1188,7 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
  * Returns: errno
  */
 
-int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
-			   char *file, unsigned int line)
+int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_alloc *al = ip->i_alloc;
@@ -1212,15 +1199,12 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
 		return -EINVAL;
 
 try_again:
-	if (hold_rindex) {
-		/* We need to hold the rindex unless the inode we're using is
-		   the rindex itself, in which case it's already held. */
-		if (ip != GFS2_I(sdp->sd_rindex))
-			error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
-		else if (!sdp->sd_rgrps) /* We may not have the rindex read
-					    in, so: */
-			error = gfs2_ri_update_special(ip);
-	}
+	/* We need to hold the rindex unless the inode we're using is
+	   the rindex itself, in which case it's already held. */
+	if (ip != GFS2_I(sdp->sd_rindex))
+		error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+	else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */
+		error = gfs2_ri_update_special(ip);
 
 	if (error)
 		return error;
@@ -1231,7 +1215,7 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
 	   try to free it, and try the allocation again. */
 	error = get_local_rgrp(ip, &unlinked, &last_unlinked);
 	if (error) {
-		if (hold_rindex && ip != GFS2_I(sdp->sd_rindex))
+		if (ip != GFS2_I(sdp->sd_rindex))
 			gfs2_glock_dq_uninit(&al->al_ri_gh);
 		if (error != -EAGAIN)
 			return error;
@@ -1273,7 +1257,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 	al->al_rgd = NULL;
 	if (al->al_rgd_gh.gh_gl)
 		gfs2_glock_dq_uninit(&al->al_rgd_gh);
-	if (ip != GFS2_I(sdp->sd_rindex) && al->al_ri_gh.gh_gl)
+	if (ip != GFS2_I(sdp->sd_rindex))
 		gfs2_glock_dq_uninit(&al->al_ri_gh);
 }
 
@@ -1512,19 +1496,11 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *dibh;
 	struct gfs2_alloc *al = ip->i_alloc;
-	struct gfs2_rgrpd *rgd;
+	struct gfs2_rgrpd *rgd = al->al_rgd;
 	u32 goal, blk;
 	u64 block;
 	int error;
 
-	/* Only happens if there is a bug in gfs2, return something distinctive
-	 * to ensure that it is noticed.
-	 */
-	if (al == NULL)
-		return -ECANCELED;
-
-	rgd = al->al_rgd;
-
 	if (rgrp_contains_block(rgd, ip->i_goal))
 		goal = ip->i_goal - rgd->rd_data0;
 	else
diff --git a/trunk/fs/gfs2/rgrp.h b/trunk/fs/gfs2/rgrp.h
index 0e35c0466f9a..f07119d89557 100644
--- a/trunk/fs/gfs2/rgrp.h
+++ b/trunk/fs/gfs2/rgrp.h
@@ -39,12 +39,10 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip)
 	ip->i_alloc = NULL;
 }
 
-extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
-				  char *file, unsigned int line);
+extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file,
+				  unsigned int line);
 #define gfs2_inplace_reserve(ip) \
-	gfs2_inplace_reserve_i((ip), 1, __FILE__, __LINE__)
-#define gfs2_inplace_reserve_ri(ip) \
-	gfs2_inplace_reserve_i((ip), 0, __FILE__, __LINE__)
+gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
 
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
 
diff --git a/trunk/fs/gfs2/super.c b/trunk/fs/gfs2/super.c
index 047d1176096c..77cb9f830ee4 100644
--- a/trunk/fs/gfs2/super.c
+++ b/trunk/fs/gfs2/super.c
@@ -85,7 +85,6 @@ static const match_table_t tokens = {
 	{Opt_locktable, "locktable=%s"},
 	{Opt_hostdata, "hostdata=%s"},
 	{Opt_spectator, "spectator"},
-	{Opt_spectator, "norecovery"},
 	{Opt_ignore_local_fs, "ignore_local_fs"},
 	{Opt_localflocks, "localflocks"},
 	{Opt_localcaching, "localcaching"},
@@ -160,13 +159,13 @@ int gfs2_mount_args(struct gfs2_args *args, char *options)
 			args->ar_spectator = 1;
 			break;
 		case Opt_ignore_local_fs:
-			/* Retained for backwards compat only */
+			args->ar_ignore_local_fs = 1;
 			break;
 		case Opt_localflocks:
 			args->ar_localflocks = 1;
 			break;
 		case Opt_localcaching:
-			/* Retained for backwards compat only */
+			args->ar_localcaching = 1;
 			break;
 		case Opt_debug:
 			if (args->ar_errors == GFS2_ERRORS_PANIC) {
@@ -180,7 +179,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options)
 			args->ar_debug = 0;
 			break;
 		case Opt_upgrade:
-			/* Retained for backwards compat only */
+			args->ar_upgrade = 1;
 			break;
 		case Opt_acl:
 			args->ar_posix_acl = 1;
@@ -343,14 +342,15 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
 {
 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-	u64 size = i_size_read(jd->jd_inode);
 
-	if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, 1 << 30))
+	if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) ||
+	    (ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) {
+		gfs2_consist_inode(ip);
 		return -EIO;
+	}
+	jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift;
 
-	jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift;
-
-	if (gfs2_write_alloc_required(ip, 0, size)) {
+	if (gfs2_write_alloc_required(ip, 0, ip->i_disksize)) {
 		gfs2_consist_inode(ip);
 		return -EIO;
 	}
@@ -1129,7 +1129,9 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
 
 	/* Some flags must not be changed */
 	if (args_neq(&args, &sdp->sd_args, spectator) ||
+	    args_neq(&args, &sdp->sd_args, ignore_local_fs) ||
 	    args_neq(&args, &sdp->sd_args, localflocks) ||
+	    args_neq(&args, &sdp->sd_args, localcaching) ||
 	    args_neq(&args, &sdp->sd_args, meta))
 		return -EINVAL;
 
@@ -1232,10 +1234,16 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 		seq_printf(s, ",hostdata=%s", args->ar_hostdata);
 	if (args->ar_spectator)
 		seq_printf(s, ",spectator");
+	if (args->ar_ignore_local_fs)
+		seq_printf(s, ",ignore_local_fs");
 	if (args->ar_localflocks)
 		seq_printf(s, ",localflocks");
+	if (args->ar_localcaching)
+		seq_printf(s, ",localcaching");
 	if (args->ar_debug)
 		seq_printf(s, ",debug");
+	if (args->ar_upgrade)
+		seq_printf(s, ",upgrade");
 	if (args->ar_posix_acl)
 		seq_printf(s, ",acl");
 	if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
diff --git a/trunk/fs/gfs2/sys.c b/trunk/fs/gfs2/sys.c
index 748ccb557c18..ccacffd2faaa 100644
--- a/trunk/fs/gfs2/sys.c
+++ b/trunk/fs/gfs2/sys.c
@@ -230,10 +230,7 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len
 
 	if (gltype > LM_TYPE_JOURNAL)
 		return -EINVAL;
-	if (gltype == LM_TYPE_NONDISK && glnum == GFS2_TRANS_LOCK)
-		glops = &gfs2_trans_glops;
-	else
-		glops = gfs2_glops_list[gltype];
+	glops = gfs2_glops_list[gltype];
 	if (glops == NULL)
 		return -EINVAL;
 	if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags))
@@ -402,32 +399,31 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf)
 
 static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
 {
-	return sprintf(buf, "%d\n", sdp->sd_lockstruct.ls_jid);
+	return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid);
 }
 
 static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 {
-        int jid;
+        unsigned jid;
 	int rv;
 
-	rv = sscanf(buf, "%d", &jid);
+	rv = sscanf(buf, "%u", &jid);
 	if (rv != 1)
 		return -EINVAL;
 
 	spin_lock(&sdp->sd_jindex_spin);
 	rv = -EINVAL;
+	if (sdp->sd_args.ar_spectator)
+		goto out;
 	if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
 		goto out;
 	rv = -EBUSY;
-	if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
+	if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
 		goto out;
-	rv = 0;
-	if (sdp->sd_args.ar_spectator && jid > 0)
-		rv = jid = -EINVAL;
 	sdp->sd_lockstruct.ls_jid = jid;
-	clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
 	smp_mb__after_clear_bit();
 	wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
+	rv = 0;
 out:
 	spin_unlock(&sdp->sd_jindex_spin);
 	return rv ? rv : len;
@@ -621,7 +617,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
 	add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
 	add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
 	if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags))
-		add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid);
+		add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid);
 	if (gfs2_uuid_valid(uuid))
 		add_uevent_var(env, "UUID=%pUB", uuid);
 	return 0;
diff --git a/trunk/fs/gfs2/trace_gfs2.h b/trunk/fs/gfs2/trace_gfs2.h
index cedb0bb96d96..148d55c14171 100644
--- a/trunk/fs/gfs2/trace_gfs2.h
+++ b/trunk/fs/gfs2/trace_gfs2.h
@@ -39,8 +39,7 @@
 	{(1UL << GLF_INVALIDATE_IN_PROGRESS),	"i" },		\
 	{(1UL << GLF_REPLY_PENDING),		"r" },		\
 	{(1UL << GLF_INITIAL),			"I" },		\
-	{(1UL << GLF_FROZEN),			"F" },		\
-	{(1UL << GLF_QUEUED),			"q" })
+	{(1UL << GLF_FROZEN),			"F" })
 
 #ifndef NUMPTY
 #define NUMPTY
diff --git a/trunk/fs/gfs2/trans.h b/trunk/fs/gfs2/trans.h
index fb56b783e028..edf9d4bd908e 100644
--- a/trunk/fs/gfs2/trans.h
+++ b/trunk/fs/gfs2/trans.h
@@ -20,20 +20,11 @@ struct gfs2_glock;
 #define RES_JDATA	1
 #define RES_DATA	1
 #define RES_LEAF	1
-#define RES_RG_HDR	1
 #define RES_RG_BIT	2
 #define RES_EATTR	1
 #define RES_STATFS	1
 #define RES_QUOTA	2
 
-/* reserve either the number of blocks to be allocated plus the rg header
- * block, or all of the blocks in the rg, whichever is smaller */
-static inline unsigned int gfs2_rg_blocks(const struct gfs2_alloc *al)
-{
-	return (al->al_requested < al->al_rgd->rd_length)?
-	       al->al_requested + 1 : al->al_rgd->rd_length;
-}
-
 int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
 		     unsigned int revokes);
 
diff --git a/trunk/fs/gfs2/xattr.c b/trunk/fs/gfs2/xattr.c
index 30b58f07c8a6..776af6eb4bcb 100644
--- a/trunk/fs/gfs2/xattr.c
+++ b/trunk/fs/gfs2/xattr.c
@@ -734,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 		goto out_gunlock_q;
 
 	error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
-				 blks + gfs2_rg_blocks(al) +
+				 blks + al->al_rgd->rd_length +
 				 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
 	if (error)
 		goto out_ipres;
diff --git a/trunk/fs/hfsplus/bfind.c b/trunk/fs/hfsplus/bfind.c
index d182438c7ae4..5007a41f1be9 100644
--- a/trunk/fs/hfsplus/bfind.c
+++ b/trunk/fs/hfsplus/bfind.c
@@ -23,7 +23,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
 	fd->search_key = ptr;
 	fd->key = ptr + tree->max_key_len + 2;
 	dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0));
-	mutex_lock(&tree->tree_lock);
+	down(&tree->tree_lock);
 	return 0;
 }
 
@@ -32,7 +32,7 @@ void hfs_find_exit(struct hfs_find_data *fd)
 	hfs_bnode_put(fd->bnode);
 	kfree(fd->search_key);
 	dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0));
-	mutex_unlock(&fd->tree->tree_lock);
+	up(&fd->tree->tree_lock);
 	fd->tree = NULL;
 }
 
@@ -52,10 +52,6 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
 		rec = (e + b) / 2;
 		len = hfs_brec_lenoff(bnode, rec, &off);
 		keylen = hfs_brec_keylen(bnode, rec);
-		if (keylen == 0) {
-			res = -EINVAL;
-			goto fail;
-		}
 		hfs_bnode_read(bnode, fd->key, off, keylen);
 		cmpval = bnode->tree->keycmp(fd->key, fd->search_key);
 		if (!cmpval) {
@@ -71,10 +67,6 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
 	if (rec != e && e >= 0) {
 		len = hfs_brec_lenoff(bnode, e, &off);
 		keylen = hfs_brec_keylen(bnode, e);
-		if (keylen == 0) {
-			res = -EINVAL;
-			goto fail;
-		}
 		hfs_bnode_read(bnode, fd->key, off, keylen);
 	}
 done:
@@ -83,7 +75,6 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
 	fd->keylength = keylen;
 	fd->entryoffset = off + keylen;
 	fd->entrylength = len - keylen;
-fail:
 	return res;
 }
 
@@ -207,10 +198,6 @@ int hfs_brec_goto(struct hfs_find_data *fd, int cnt)
 
 	len = hfs_brec_lenoff(bnode, fd->record, &off);
 	keylen = hfs_brec_keylen(bnode, fd->record);
-	if (keylen == 0) {
-		res = -EINVAL;
-		goto out;
-	}
 	fd->keyoffset = off;
 	fd->keylength = keylen;
 	fd->entryoffset = off + keylen;
diff --git a/trunk/fs/hfsplus/bitmap.c b/trunk/fs/hfsplus/bitmap.c
index ad57f5991eb1..ea30afc2a03c 100644
--- a/trunk/fs/hfsplus/bitmap.c
+++ b/trunk/fs/hfsplus/bitmap.c
@@ -17,7 +17,6 @@
 
 int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
 	struct page *page;
 	struct address_space *mapping;
 	__be32 *pptr, *curr, *end;
@@ -30,8 +29,8 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma
 		return size;
 
 	dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len);
-	mutex_lock(&sbi->alloc_mutex);
-	mapping = sbi->alloc_file->i_mapping;
+	mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
+	mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
 	page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL);
 	if (IS_ERR(page)) {
 		start = size;
@@ -151,17 +150,16 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma
 	set_page_dirty(page);
 	kunmap(page);
 	*max = offset + (curr - pptr) * 32 + i - start;
-	sbi->free_blocks -= *max;
+	HFSPLUS_SB(sb).free_blocks -= *max;
 	sb->s_dirt = 1;
 	dprint(DBG_BITMAP, "-> %u,%u\n", start, *max);
 out:
-	mutex_unlock(&sbi->alloc_mutex);
+	mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
 	return start;
 }
 
 int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
 	struct page *page;
 	struct address_space *mapping;
 	__be32 *pptr, *curr, *end;
@@ -174,11 +172,11 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
 
 	dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count);
 	/* are all of the bits in range? */
-	if ((offset + count) > sbi->total_blocks)
+	if ((offset + count) > HFSPLUS_SB(sb).total_blocks)
 		return -2;
 
-	mutex_lock(&sbi->alloc_mutex);
-	mapping = sbi->alloc_file->i_mapping;
+	mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
+	mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
 	pnr = offset / PAGE_CACHE_BITS;
 	page = read_mapping_page(mapping, pnr, NULL);
 	pptr = kmap(page);
@@ -226,9 +224,9 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
 out:
 	set_page_dirty(page);
 	kunmap(page);
-	sbi->free_blocks += len;
+	HFSPLUS_SB(sb).free_blocks += len;
 	sb->s_dirt = 1;
-	mutex_unlock(&sbi->alloc_mutex);
+	mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
 
 	return 0;
 }
diff --git a/trunk/fs/hfsplus/brec.c b/trunk/fs/hfsplus/brec.c
index 2f39d05443e1..c88e5d72a402 100644
--- a/trunk/fs/hfsplus/brec.c
+++ b/trunk/fs/hfsplus/brec.c
@@ -42,13 +42,10 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec)
 		recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2);
 		if (!recoff)
 			return 0;
-
-		retval = hfs_bnode_read_u16(node, recoff) + 2;
-		if (retval > node->tree->max_key_len + 2) {
-			printk(KERN_ERR "hfs: keylen %d too large\n",
-				retval);
-			retval = 0;
-		}
+		if (node->tree->attributes & HFS_TREE_BIGKEYS)
+			retval = hfs_bnode_read_u16(node, recoff) + 2;
+		else
+			retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1;
 	}
 	return retval;
 }
@@ -219,7 +216,7 @@ int hfs_brec_remove(struct hfs_find_data *fd)
 static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd)
 {
 	struct hfs_btree *tree;
-	struct hfs_bnode *node, *new_node, *next_node;
+	struct hfs_bnode *node, *new_node;
 	struct hfs_bnode_desc node_desc;
 	int num_recs, new_rec_off, new_off, old_rec_off;
 	int data_start, data_end, size;
@@ -238,17 +235,6 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd)
 	new_node->type = node->type;
 	new_node->height = node->height;
 
-	if (node->next)
-		next_node = hfs_bnode_find(tree, node->next);
-	else
-		next_node = NULL;
-
-	if (IS_ERR(next_node)) {
-		hfs_bnode_put(node);
-		hfs_bnode_put(new_node);
-		return next_node;
-	}
-
 	size = tree->node_size / 2 - node->num_recs * 2 - 14;
 	old_rec_off = tree->node_size - 4;
 	num_recs = 1;
@@ -262,8 +248,6 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd)
 		/* panic? */
 		hfs_bnode_put(node);
 		hfs_bnode_put(new_node);
-		if (next_node)
-			hfs_bnode_put(next_node);
 		return ERR_PTR(-ENOSPC);
 	}
 
@@ -318,7 +302,8 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd)
 	hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc));
 
 	/* update next bnode header */
-	if (next_node) {
+	if (new_node->next) {
+		struct hfs_bnode *next_node = hfs_bnode_find(tree, new_node->next);
 		next_node->prev = new_node->this;
 		hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc));
 		node_desc.prev = cpu_to_be32(next_node->prev);
diff --git a/trunk/fs/hfsplus/btree.c b/trunk/fs/hfsplus/btree.c
index 22e4d4e32999..e49fcee1e293 100644
--- a/trunk/fs/hfsplus/btree.c
+++ b/trunk/fs/hfsplus/btree.c
@@ -30,7 +30,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
 	if (!tree)
 		return NULL;
 
-	mutex_init(&tree->tree_lock);
+	init_MUTEX(&tree->tree_lock);
 	spin_lock_init(&tree->hash_lock);
 	tree->sb = sb;
 	tree->cnid = id;
@@ -39,16 +39,10 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
 		goto free_tree;
 	tree->inode = inode;
 
-	if (!HFSPLUS_I(tree->inode)->first_blocks) {
-		printk(KERN_ERR
-		       "hfs: invalid btree extent records (0 size).\n");
-		goto free_inode;
-	}
-
 	mapping = tree->inode->i_mapping;
 	page = read_mapping_page(mapping, 0, NULL);
 	if (IS_ERR(page))
-		goto free_inode;
+		goto free_tree;
 
 	/* Load the header */
 	head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc));
@@ -63,56 +57,27 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
 	tree->max_key_len = be16_to_cpu(head->max_key_len);
 	tree->depth = be16_to_cpu(head->depth);
 
-	/* Verify the tree and set the correct compare function */
-	switch (id) {
-	case HFSPLUS_EXT_CNID:
-		if (tree->max_key_len != HFSPLUS_EXT_KEYLEN - sizeof(u16)) {
-			printk(KERN_ERR "hfs: invalid extent max_key_len %d\n",
-				tree->max_key_len);
-			goto fail_page;
-		}
-		if (tree->attributes & HFS_TREE_VARIDXKEYS) {
-			printk(KERN_ERR "hfs: invalid extent btree flag\n");
-			goto fail_page;
-		}
-
+	/* Set the correct compare function */
+	if (id == HFSPLUS_EXT_CNID) {
 		tree->keycmp = hfsplus_ext_cmp_key;
-		break;
-	case HFSPLUS_CAT_CNID:
-		if (tree->max_key_len != HFSPLUS_CAT_KEYLEN - sizeof(u16)) {
-			printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n",
-				tree->max_key_len);
-			goto fail_page;
-		}
-		if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) {
-			printk(KERN_ERR "hfs: invalid catalog btree flag\n");
-			goto fail_page;
-		}
-
-		if (test_bit(HFSPLUS_SB_HFSX, &HFSPLUS_SB(sb)->flags) &&
+	} else if (id == HFSPLUS_CAT_CNID) {
+		if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) &&
 		    (head->key_type == HFSPLUS_KEY_BINARY))
 			tree->keycmp = hfsplus_cat_bin_cmp_key;
 		else {
 			tree->keycmp = hfsplus_cat_case_cmp_key;
-			set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
+			HFSPLUS_SB(sb).flags |= HFSPLUS_SB_CASEFOLD;
 		}
-		break;
-	default:
+	} else {
 		printk(KERN_ERR "hfs: unknown B*Tree requested\n");
 		goto fail_page;
 	}
 
-	if (!(tree->attributes & HFS_TREE_BIGKEYS)) {
-		printk(KERN_ERR "hfs: invalid btree flag\n");
-		goto fail_page;
-	}
-
 	size = tree->node_size;
 	if (!is_power_of_2(size))
 		goto fail_page;
 	if (!tree->node_count)
 		goto fail_page;
-
 	tree->node_size_shift = ffs(size) - 1;
 
 	tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -122,11 +87,10 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
 	return tree;
 
  fail_page:
-	page_cache_release(page);
- free_inode:
 	tree->inode->i_mapping->a_ops = &hfsplus_aops;
-	iput(tree->inode);
+	page_cache_release(page);
  free_tree:
+	iput(tree->inode);
 	kfree(tree);
 	return NULL;
 }
@@ -228,18 +192,17 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
 
 	while (!tree->free_nodes) {
 		struct inode *inode = tree->inode;
-		struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
 		u32 count;
 		int res;
 
 		res = hfsplus_file_extend(inode);
 		if (res)
 			return ERR_PTR(res);
-		hip->phys_size = inode->i_size =
-			(loff_t)hip->alloc_blocks <<
-				HFSPLUS_SB(tree->sb)->alloc_blksz_shift;
-		hip->fs_blocks =
-			hip->alloc_blocks << HFSPLUS_SB(tree->sb)->fs_shift;
+		HFSPLUS_I(inode).phys_size = inode->i_size =
+				(loff_t)HFSPLUS_I(inode).alloc_blocks <<
+				HFSPLUS_SB(tree->sb).alloc_blksz_shift;
+		HFSPLUS_I(inode).fs_blocks = HFSPLUS_I(inode).alloc_blocks <<
+					     HFSPLUS_SB(tree->sb).fs_shift;
 		inode_set_bytes(inode, inode->i_size);
 		count = inode->i_size >> tree->node_size_shift;
 		tree->free_nodes = count - tree->node_count;
diff --git a/trunk/fs/hfsplus/catalog.c b/trunk/fs/hfsplus/catalog.c
index 8af45fc5b051..f6874acb2cf2 100644
--- a/trunk/fs/hfsplus/catalog.c
+++ b/trunk/fs/hfsplus/catalog.c
@@ -67,7 +67,7 @@ static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent,
 	key->key_len = cpu_to_be16(6 + ustrlen);
 }
 
-void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms)
+static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms)
 {
 	if (inode->i_flags & S_IMMUTABLE)
 		perms->rootflags |= HFSPLUS_FLG_IMMUTABLE;
@@ -77,24 +77,15 @@ void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms)
 		perms->rootflags |= HFSPLUS_FLG_APPEND;
 	else
 		perms->rootflags &= ~HFSPLUS_FLG_APPEND;
-
-	perms->userflags = HFSPLUS_I(inode)->userflags;
+	HFSPLUS_I(inode).rootflags = perms->rootflags;
+	HFSPLUS_I(inode).userflags = perms->userflags;
 	perms->mode = cpu_to_be16(inode->i_mode);
 	perms->owner = cpu_to_be32(inode->i_uid);
 	perms->group = cpu_to_be32(inode->i_gid);
-
-	if (S_ISREG(inode->i_mode))
-		perms->dev = cpu_to_be32(inode->i_nlink);
-	else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode))
-		perms->dev = cpu_to_be32(inode->i_rdev);
-	else
-		perms->dev = 0;
 }
 
 static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
-
 	if (S_ISDIR(inode->i_mode)) {
 		struct hfsplus_cat_folder *folder;
 
@@ -102,13 +93,13 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i
 		memset(folder, 0, sizeof(*folder));
 		folder->type = cpu_to_be16(HFSPLUS_FOLDER);
 		folder->id = cpu_to_be32(inode->i_ino);
-		HFSPLUS_I(inode)->create_date =
+		HFSPLUS_I(inode).create_date =
 			folder->create_date =
 			folder->content_mod_date =
 			folder->attribute_mod_date =
 			folder->access_date = hfsp_now2mt();
-		hfsplus_cat_set_perms(inode, &folder->permissions);
-		if (inode == sbi->hidden_dir)
+		hfsplus_set_perms(inode, &folder->permissions);
+		if (inode == HFSPLUS_SB(inode->i_sb).hidden_dir)
 			/* invisible and namelocked */
 			folder->user_info.frFlags = cpu_to_be16(0x5000);
 		return sizeof(*folder);
@@ -120,19 +111,19 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i
 		file->type = cpu_to_be16(HFSPLUS_FILE);
 		file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS);
 		file->id = cpu_to_be32(cnid);
-		HFSPLUS_I(inode)->create_date =
+		HFSPLUS_I(inode).create_date =
 			file->create_date =
 			file->content_mod_date =
 			file->attribute_mod_date =
 			file->access_date = hfsp_now2mt();
 		if (cnid == inode->i_ino) {
-			hfsplus_cat_set_perms(inode, &file->permissions);
+			hfsplus_set_perms(inode, &file->permissions);
 			if (S_ISLNK(inode->i_mode)) {
 				file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE);
 				file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR);
 			} else {
-				file->user_info.fdType = cpu_to_be32(sbi->type);
-				file->user_info.fdCreator = cpu_to_be32(sbi->creator);
+				file->user_info.fdType = cpu_to_be32(HFSPLUS_SB(inode->i_sb).type);
+				file->user_info.fdCreator = cpu_to_be32(HFSPLUS_SB(inode->i_sb).creator);
 			}
 			if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE)
 				file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED);
@@ -140,8 +131,8 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i
 			file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE);
 			file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR);
 			file->user_info.fdFlags = cpu_to_be16(0x100);
-			file->create_date = HFSPLUS_I(sbi->hidden_dir)->create_date;
-			file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode)->linkid);
+			file->create_date = HFSPLUS_I(HFSPLUS_SB(inode->i_sb).hidden_dir).create_date;
+			file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode).dev);
 		}
 		return sizeof(*file);
 	}
@@ -189,14 +180,15 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
 
 int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode)
 {
-	struct super_block *sb = dir->i_sb;
 	struct hfs_find_data fd;
+	struct super_block *sb;
 	hfsplus_cat_entry entry;
 	int entry_size;
 	int err;
 
 	dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink);
-	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+	sb = dir->i_sb;
+	hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
 
 	hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
 	entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ?
@@ -242,7 +234,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct ino
 
 int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
 {
-	struct super_block *sb = dir->i_sb;
+	struct super_block *sb;
 	struct hfs_find_data fd;
 	struct hfsplus_fork_raw fork;
 	struct list_head *pos;
@@ -250,7 +242,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
 	u16 type;
 
 	dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid);
-	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+	sb = dir->i_sb;
+	hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
 
 	if (!str) {
 		int len;
@@ -286,7 +279,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
 		hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC);
 	}
 
-	list_for_each(pos, &HFSPLUS_I(dir)->open_dir_list) {
+	list_for_each(pos, &HFSPLUS_I(dir).open_dir_list) {
 		struct hfsplus_readdir_data *rd =
 			list_entry(pos, struct hfsplus_readdir_data, list);
 		if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0)
@@ -319,7 +312,7 @@ int hfsplus_rename_cat(u32 cnid,
 		       struct inode *src_dir, struct qstr *src_name,
 		       struct inode *dst_dir, struct qstr *dst_name)
 {
-	struct super_block *sb = src_dir->i_sb;
+	struct super_block *sb;
 	struct hfs_find_data src_fd, dst_fd;
 	hfsplus_cat_entry entry;
 	int entry_size, type;
@@ -327,7 +320,8 @@ int hfsplus_rename_cat(u32 cnid,
 
 	dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name,
 		dst_dir->i_ino, dst_name->name);
-	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd);
+	sb = src_dir->i_sb;
+	hfs_find_init(HFSPLUS_SB(sb).cat_tree, &src_fd);
 	dst_fd = src_fd;
 
 	/* find the old dir entry and read the data */
diff --git a/trunk/fs/hfsplus/dir.c b/trunk/fs/hfsplus/dir.c
index d236d85ec9d7..764fd1bdca88 100644
--- a/trunk/fs/hfsplus/dir.c
+++ b/trunk/fs/hfsplus/dir.c
@@ -39,7 +39,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
 
 	dentry->d_op = &hfsplus_dentry_operations;
 	dentry->d_fsdata = NULL;
-	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+	hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
 	hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
 again:
 	err = hfs_brec_read(&fd, &entry, sizeof(entry));
@@ -68,9 +68,9 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
 		cnid = be32_to_cpu(entry.file.id);
 		if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) &&
 		    entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) &&
-		    (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->create_date ||
-		     entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode)->create_date) &&
-		    HFSPLUS_SB(sb)->hidden_dir) {
+		    (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb).hidden_dir).create_date ||
+		     entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode).create_date) &&
+		    HFSPLUS_SB(sb).hidden_dir) {
 			struct qstr str;
 			char name[32];
 
@@ -86,8 +86,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
 				linkid = be32_to_cpu(entry.file.permissions.dev);
 				str.len = sprintf(name, "iNode%d", linkid);
 				str.name = name;
-				hfsplus_cat_build_key(sb, fd.search_key,
-					HFSPLUS_SB(sb)->hidden_dir->i_ino, &str);
+				hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb).hidden_dir->i_ino, &str);
 				goto again;
 			}
 		} else if (!dentry->d_fsdata)
@@ -102,7 +101,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
 	if (S_ISREG(inode->i_mode))
-		HFSPLUS_I(inode)->linkid = linkid;
+		HFSPLUS_I(inode).dev = linkid;
 out:
 	d_add(dentry, inode);
 	return NULL;
@@ -125,7 +124,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	if (filp->f_pos >= inode->i_size)
 		return 0;
 
-	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+	hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
 	hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
 	err = hfs_brec_find(&fd);
 	if (err)
@@ -181,9 +180,8 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
 				err = -EIO;
 				goto out;
 			}
-			if (HFSPLUS_SB(sb)->hidden_dir &&
-			    HFSPLUS_SB(sb)->hidden_dir->i_ino ==
-					be32_to_cpu(entry.folder.id))
+			if (HFSPLUS_SB(sb).hidden_dir &&
+			    HFSPLUS_SB(sb).hidden_dir->i_ino == be32_to_cpu(entry.folder.id))
 				goto next;
 			if (filldir(dirent, strbuf, len, filp->f_pos,
 				    be32_to_cpu(entry.folder.id), DT_DIR))
@@ -219,7 +217,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 		filp->private_data = rd;
 		rd->file = filp;
-		list_add(&rd->list, &HFSPLUS_I(inode)->open_dir_list);
+		list_add(&rd->list, &HFSPLUS_I(inode).open_dir_list);
 	}
 	memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key));
 out:
@@ -231,18 +229,38 @@ static int hfsplus_dir_release(struct inode *inode, struct file *file)
 {
 	struct hfsplus_readdir_data *rd = file->private_data;
 	if (rd) {
-		mutex_lock(&inode->i_mutex);
 		list_del(&rd->list);
-		mutex_unlock(&inode->i_mutex);
 		kfree(rd);
 	}
 	return 0;
 }
 
+static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode,
+			  struct nameidata *nd)
+{
+	struct inode *inode;
+	int res;
+
+	inode = hfsplus_new_inode(dir->i_sb, mode);
+	if (!inode)
+		return -ENOSPC;
+
+	res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode);
+	if (res) {
+		inode->i_nlink = 0;
+		hfsplus_delete_inode(inode);
+		iput(inode);
+		return res;
+	}
+	hfsplus_instantiate(dentry, inode, inode->i_ino);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
 static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
 			struct dentry *dst_dentry)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(dst_dir->i_sb);
+	struct super_block *sb = dst_dir->i_sb;
 	struct inode *inode = src_dentry->d_inode;
 	struct inode *src_dir = src_dentry->d_parent->d_inode;
 	struct qstr str;
@@ -252,10 +270,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
 
 	if (HFSPLUS_IS_RSRC(inode))
 		return -EPERM;
-	if (!S_ISREG(inode->i_mode))
-		return -EPERM;
 
-	mutex_lock(&sbi->vh_mutex);
 	if (inode->i_ino == (u32)(unsigned long)src_dentry->d_fsdata) {
 		for (;;) {
 			get_random_bytes(&id, sizeof(cnid));
@@ -264,41 +279,40 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
 			str.len = sprintf(name, "iNode%d", id);
 			res = hfsplus_rename_cat(inode->i_ino,
 						 src_dir, &src_dentry->d_name,
-						 sbi->hidden_dir, &str);
+						 HFSPLUS_SB(sb).hidden_dir, &str);
 			if (!res)
 				break;
 			if (res != -EEXIST)
-				goto out;
+				return res;
 		}
-		HFSPLUS_I(inode)->linkid = id;
-		cnid = sbi->next_cnid++;
+		HFSPLUS_I(inode).dev = id;
+		cnid = HFSPLUS_SB(sb).next_cnid++;
 		src_dentry->d_fsdata = (void *)(unsigned long)cnid;
 		res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode);
 		if (res)
 			/* panic? */
-			goto out;
-		sbi->file_count++;
+			return res;
+		HFSPLUS_SB(sb).file_count++;
 	}
-	cnid = sbi->next_cnid++;
+	cnid = HFSPLUS_SB(sb).next_cnid++;
 	res = hfsplus_create_cat(cnid, dst_dir, &dst_dentry->d_name, inode);
 	if (res)
-		goto out;
+		return res;
 
 	inc_nlink(inode);
 	hfsplus_instantiate(dst_dentry, inode, cnid);
 	atomic_inc(&inode->i_count);
 	inode->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
-	sbi->file_count++;
-	dst_dir->i_sb->s_dirt = 1;
-out:
-	mutex_unlock(&sbi->vh_mutex);
-	return res;
+	HFSPLUS_SB(sb).file_count++;
+	sb->s_dirt = 1;
+
+	return 0;
 }
 
 static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
+	struct super_block *sb = dir->i_sb;
 	struct inode *inode = dentry->d_inode;
 	struct qstr str;
 	char name[32];
@@ -308,22 +322,21 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
 	if (HFSPLUS_IS_RSRC(inode))
 		return -EPERM;
 
-	mutex_lock(&sbi->vh_mutex);
 	cnid = (u32)(unsigned long)dentry->d_fsdata;
 	if (inode->i_ino == cnid &&
-	    atomic_read(&HFSPLUS_I(inode)->opencnt)) {
+	    atomic_read(&HFSPLUS_I(inode).opencnt)) {
 		str.name = name;
 		str.len = sprintf(name, "temp%lu", inode->i_ino);
 		res = hfsplus_rename_cat(inode->i_ino,
 					 dir, &dentry->d_name,
-					 sbi->hidden_dir, &str);
+					 HFSPLUS_SB(sb).hidden_dir, &str);
 		if (!res)
 			inode->i_flags |= S_DEAD;
-		goto out;
+		return res;
 	}
 	res = hfsplus_delete_cat(cnid, dir, &dentry->d_name);
 	if (res)
-		goto out;
+		return res;
 
 	if (inode->i_nlink > 0)
 		drop_nlink(inode);
@@ -331,10 +344,10 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
 		clear_nlink(inode);
 	if (!inode->i_nlink) {
 		if (inode->i_ino != cnid) {
-			sbi->file_count--;
-			if (!atomic_read(&HFSPLUS_I(inode)->opencnt)) {
+			HFSPLUS_SB(sb).file_count--;
+			if (!atomic_read(&HFSPLUS_I(inode).opencnt)) {
 				res = hfsplus_delete_cat(inode->i_ino,
-							 sbi->hidden_dir,
+							 HFSPLUS_SB(sb).hidden_dir,
 							 NULL);
 				if (!res)
 					hfsplus_delete_inode(inode);
@@ -343,108 +356,107 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
 		} else
 			hfsplus_delete_inode(inode);
 	} else
-		sbi->file_count--;
+		HFSPLUS_SB(sb).file_count--;
 	inode->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
-out:
-	mutex_unlock(&sbi->vh_mutex);
+
 	return res;
 }
 
+static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	struct inode *inode;
+	int res;
+
+	inode = hfsplus_new_inode(dir->i_sb, S_IFDIR | mode);
+	if (!inode)
+		return -ENOSPC;
+
+	res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode);
+	if (res) {
+		inode->i_nlink = 0;
+		hfsplus_delete_inode(inode);
+		iput(inode);
+		return res;
+	}
+	hfsplus_instantiate(dentry, inode, inode->i_ino);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
 static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode;
 	int res;
 
+	inode = dentry->d_inode;
 	if (inode->i_size != 2)
 		return -ENOTEMPTY;
-
-	mutex_lock(&sbi->vh_mutex);
 	res = hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name);
 	if (res)
-		goto out;
+		return res;
 	clear_nlink(inode);
 	inode->i_ctime = CURRENT_TIME_SEC;
 	hfsplus_delete_inode(inode);
 	mark_inode_dirty(inode);
-out:
-	mutex_unlock(&sbi->vh_mutex);
-	return res;
+	return 0;
 }
 
 static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
 			   const char *symname)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
+	struct super_block *sb;
 	struct inode *inode;
-	int res = -ENOSPC;
+	int res;
 
-	mutex_lock(&sbi->vh_mutex);
-	inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO);
+	sb = dir->i_sb;
+	inode = hfsplus_new_inode(sb, S_IFLNK | S_IRWXUGO);
 	if (!inode)
-		goto out;
+		return -ENOSPC;
 
 	res = page_symlink(inode, symname, strlen(symname) + 1);
-	if (res)
-		goto out_err;
+	if (res) {
+		inode->i_nlink = 0;
+		hfsplus_delete_inode(inode);
+		iput(inode);
+		return res;
+	}
 
+	mark_inode_dirty(inode);
 	res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode);
-	if (res)
-		goto out_err;
 
-	hfsplus_instantiate(dentry, inode, inode->i_ino);
-	mark_inode_dirty(inode);
-	goto out;
+	if (!res) {
+		hfsplus_instantiate(dentry, inode, inode->i_ino);
+		mark_inode_dirty(inode);
+	}
 
-out_err:
-	inode->i_nlink = 0;
-	hfsplus_delete_inode(inode);
-	iput(inode);
-out:
-	mutex_unlock(&sbi->vh_mutex);
 	return res;
 }
 
 static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
 			 int mode, dev_t rdev)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
+	struct super_block *sb;
 	struct inode *inode;
-	int res = -ENOSPC;
+	int res;
 
-	mutex_lock(&sbi->vh_mutex);
-	inode = hfsplus_new_inode(dir->i_sb, mode);
+	sb = dir->i_sb;
+	inode = hfsplus_new_inode(sb, mode);
 	if (!inode)
-		goto out;
-
-	if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode))
-		init_special_inode(inode, mode, rdev);
+		return -ENOSPC;
 
 	res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode);
 	if (res) {
 		inode->i_nlink = 0;
 		hfsplus_delete_inode(inode);
 		iput(inode);
-		goto out;
+		return res;
 	}
-
+	init_special_inode(inode, mode, rdev);
 	hfsplus_instantiate(dentry, inode, inode->i_ino);
 	mark_inode_dirty(inode);
-out:
-	mutex_unlock(&sbi->vh_mutex);
-	return res;
-}
 
-static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode,
-			  struct nameidata *nd)
-{
-	return hfsplus_mknod(dir, dentry, mode, 0);
-}
-
-static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
-	return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0);
+	return 0;
 }
 
 static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
@@ -454,10 +466,7 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	/* Unlink destination if it already exists */
 	if (new_dentry->d_inode) {
-		if (S_ISDIR(new_dentry->d_inode->i_mode))
-			res = hfsplus_rmdir(new_dir, new_dentry);
-		else
-			res = hfsplus_unlink(new_dir, new_dentry);
+		res = hfsplus_unlink(new_dir, new_dentry);
 		if (res)
 			return res;
 	}
diff --git a/trunk/fs/hfsplus/extents.c b/trunk/fs/hfsplus/extents.c
index 0c9cb1820a52..0022eec63cda 100644
--- a/trunk/fs/hfsplus/extents.c
+++ b/trunk/fs/hfsplus/extents.c
@@ -85,49 +85,35 @@ static u32 hfsplus_ext_lastblock(struct hfsplus_extent *ext)
 
 static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd)
 {
-	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
 	int res;
 
-	WARN_ON(!mutex_is_locked(&hip->extents_lock));
-
-	hfsplus_ext_build_key(fd->search_key, inode->i_ino, hip->cached_start,
-			      HFSPLUS_IS_RSRC(inode) ?
-				HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA);
-
+	hfsplus_ext_build_key(fd->search_key, inode->i_ino, HFSPLUS_I(inode).cached_start,
+			      HFSPLUS_IS_RSRC(inode) ?  HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA);
 	res = hfs_brec_find(fd);
-	if (hip->flags & HFSPLUS_FLG_EXT_NEW) {
+	if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_NEW) {
 		if (res != -ENOENT)
 			return;
-		hfs_brec_insert(fd, hip->cached_extents,
-				sizeof(hfsplus_extent_rec));
-		hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW);
+		hfs_brec_insert(fd, HFSPLUS_I(inode).cached_extents, sizeof(hfsplus_extent_rec));
+		HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW);
 	} else {
 		if (res)
 			return;
-		hfs_bnode_write(fd->bnode, hip->cached_extents,
-				fd->entryoffset, fd->entrylength);
-		hip->flags &= ~HFSPLUS_FLG_EXT_DIRTY;
+		hfs_bnode_write(fd->bnode, HFSPLUS_I(inode).cached_extents, fd->entryoffset, fd->entrylength);
+		HFSPLUS_I(inode).flags &= ~HFSPLUS_FLG_EXT_DIRTY;
 	}
 }
 
-static void hfsplus_ext_write_extent_locked(struct inode *inode)
+void hfsplus_ext_write_extent(struct inode *inode)
 {
-	if (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_EXT_DIRTY) {
+	if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_DIRTY) {
 		struct hfs_find_data fd;
 
-		hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
+		hfs_find_init(HFSPLUS_SB(inode->i_sb).ext_tree, &fd);
 		__hfsplus_ext_write_extent(inode, &fd);
 		hfs_find_exit(&fd);
 	}
 }
 
-void hfsplus_ext_write_extent(struct inode *inode)
-{
-	mutex_lock(&HFSPLUS_I(inode)->extents_lock);
-	hfsplus_ext_write_extent_locked(inode);
-	mutex_unlock(&HFSPLUS_I(inode)->extents_lock);
-}
-
 static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd,
 					    struct hfsplus_extent *extent,
 					    u32 cnid, u32 block, u8 type)
@@ -150,39 +136,33 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd,
 
 static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block)
 {
-	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
 	int res;
 
-	WARN_ON(!mutex_is_locked(&hip->extents_lock));
-
-	if (hip->flags & HFSPLUS_FLG_EXT_DIRTY)
+	if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_DIRTY)
 		__hfsplus_ext_write_extent(inode, fd);
 
-	res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino,
-					block, HFSPLUS_IS_RSRC(inode) ?
-						HFSPLUS_TYPE_RSRC :
-						HFSPLUS_TYPE_DATA);
+	res = __hfsplus_ext_read_extent(fd, HFSPLUS_I(inode).cached_extents, inode->i_ino,
+					block, HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA);
 	if (!res) {
-		hip->cached_start = be32_to_cpu(fd->key->ext.start_block);
-		hip->cached_blocks = hfsplus_ext_block_count(hip->cached_extents);
+		HFSPLUS_I(inode).cached_start = be32_to_cpu(fd->key->ext.start_block);
+		HFSPLUS_I(inode).cached_blocks = hfsplus_ext_block_count(HFSPLUS_I(inode).cached_extents);
 	} else {
-		hip->cached_start = hip->cached_blocks = 0;
-		hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW);
+		HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).cached_blocks = 0;
+		HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW);
 	}
 	return res;
 }
 
 static int hfsplus_ext_read_extent(struct inode *inode, u32 block)
 {
-	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
 	struct hfs_find_data fd;
 	int res;
 
-	if (block >= hip->cached_start &&
-	    block < hip->cached_start + hip->cached_blocks)
+	if (block >= HFSPLUS_I(inode).cached_start &&
+	    block < HFSPLUS_I(inode).cached_start + HFSPLUS_I(inode).cached_blocks)
 		return 0;
 
-	hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
+	hfs_find_init(HFSPLUS_SB(inode->i_sb).ext_tree, &fd);
 	res = __hfsplus_ext_cache_extent(&fd, inode, block);
 	hfs_find_exit(&fd);
 	return res;
@@ -192,21 +172,21 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block)
 int hfsplus_get_block(struct inode *inode, sector_t iblock,
 		      struct buffer_head *bh_result, int create)
 {
-	struct super_block *sb = inode->i_sb;
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
-	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
+	struct super_block *sb;
 	int res = -EIO;
 	u32 ablock, dblock, mask;
 	int shift;
 
+	sb = inode->i_sb;
+
 	/* Convert inode block to disk allocation block */
-	shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits;
-	ablock = iblock >> sbi->fs_shift;
+	shift = HFSPLUS_SB(sb).alloc_blksz_shift - sb->s_blocksize_bits;
+	ablock = iblock >> HFSPLUS_SB(sb).fs_shift;
 
-	if (iblock >= hip->fs_blocks) {
-		if (iblock > hip->fs_blocks || !create)
+	if (iblock >= HFSPLUS_I(inode).fs_blocks) {
+		if (iblock > HFSPLUS_I(inode).fs_blocks || !create)
 			return -EIO;
-		if (ablock >= hip->alloc_blocks) {
+		if (ablock >= HFSPLUS_I(inode).alloc_blocks) {
 			res = hfsplus_file_extend(inode);
 			if (res)
 				return res;
@@ -214,33 +194,33 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
 	} else
 		create = 0;
 
-	if (ablock < hip->first_blocks) {
-		dblock = hfsplus_ext_find_block(hip->first_extents, ablock);
+	if (ablock < HFSPLUS_I(inode).first_blocks) {
+		dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).first_extents, ablock);
 		goto done;
 	}
 
 	if (inode->i_ino == HFSPLUS_EXT_CNID)
 		return -EIO;
 
-	mutex_lock(&hip->extents_lock);
+	mutex_lock(&HFSPLUS_I(inode).extents_lock);
 	res = hfsplus_ext_read_extent(inode, ablock);
 	if (!res) {
-		dblock = hfsplus_ext_find_block(hip->cached_extents,
-						ablock - hip->cached_start);
+		dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock -
+					     HFSPLUS_I(inode).cached_start);
 	} else {
-		mutex_unlock(&hip->extents_lock);
+		mutex_unlock(&HFSPLUS_I(inode).extents_lock);
 		return -EIO;
 	}
-	mutex_unlock(&hip->extents_lock);
+	mutex_unlock(&HFSPLUS_I(inode).extents_lock);
 
 done:
 	dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock);
-	mask = (1 << sbi->fs_shift) - 1;
-	map_bh(bh_result, sb, (dblock << sbi->fs_shift) + sbi->blockoffset + (iblock & mask));
+	mask = (1 << HFSPLUS_SB(sb).fs_shift) - 1;
+	map_bh(bh_result, sb, (dblock << HFSPLUS_SB(sb).fs_shift) + HFSPLUS_SB(sb).blockoffset + (iblock & mask));
 	if (create) {
 		set_buffer_new(bh_result);
-		hip->phys_size += sb->s_blocksize;
-		hip->fs_blocks++;
+		HFSPLUS_I(inode).phys_size += sb->s_blocksize;
+		HFSPLUS_I(inode).fs_blocks++;
 		inode_add_bytes(inode, sb->s_blocksize);
 		mark_inode_dirty(inode);
 	}
@@ -347,7 +327,7 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw
 	if (total_blocks == blocks)
 		return 0;
 
-	hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
+	hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd);
 	do {
 		res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid,
 						total_blocks, type);
@@ -368,33 +348,29 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw
 int hfsplus_file_extend(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
-	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
 	u32 start, len, goal;
 	int res;
 
-	if (sbi->alloc_file->i_size * 8 <
-	    sbi->total_blocks - sbi->free_blocks + 8) {
+	if (HFSPLUS_SB(sb).alloc_file->i_size * 8 < HFSPLUS_SB(sb).total_blocks - HFSPLUS_SB(sb).free_blocks + 8) {
 		// extend alloc file
-		printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n",
-				sbi->alloc_file->i_size * 8,
-				sbi->total_blocks, sbi->free_blocks);
+		printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", HFSPLUS_SB(sb).alloc_file->i_size * 8,
+			HFSPLUS_SB(sb).total_blocks, HFSPLUS_SB(sb).free_blocks);
 		return -ENOSPC;
 	}
 
-	mutex_lock(&hip->extents_lock);
-	if (hip->alloc_blocks == hip->first_blocks)
-		goal = hfsplus_ext_lastblock(hip->first_extents);
+	mutex_lock(&HFSPLUS_I(inode).extents_lock);
+	if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks)
+		goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents);
 	else {
-		res = hfsplus_ext_read_extent(inode, hip->alloc_blocks);
+		res = hfsplus_ext_read_extent(inode, HFSPLUS_I(inode).alloc_blocks);
 		if (res)
 			goto out;
-		goal = hfsplus_ext_lastblock(hip->cached_extents);
+		goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).cached_extents);
 	}
 
-	len = hip->clump_blocks;
-	start = hfsplus_block_allocate(sb, sbi->total_blocks, goal, &len);
-	if (start >= sbi->total_blocks) {
+	len = HFSPLUS_I(inode).clump_blocks;
+	start = hfsplus_block_allocate(sb, HFSPLUS_SB(sb).total_blocks, goal, &len);
+	if (start >= HFSPLUS_SB(sb).total_blocks) {
 		start = hfsplus_block_allocate(sb, goal, 0, &len);
 		if (start >= goal) {
 			res = -ENOSPC;
@@ -403,56 +379,56 @@ int hfsplus_file_extend(struct inode *inode)
 	}
 
 	dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len);
-
-	if (hip->alloc_blocks <= hip->first_blocks) {
-		if (!hip->first_blocks) {
+	if (HFSPLUS_I(inode).alloc_blocks <= HFSPLUS_I(inode).first_blocks) {
+		if (!HFSPLUS_I(inode).first_blocks) {
 			dprint(DBG_EXTENT, "first extents\n");
 			/* no extents yet */
-			hip->first_extents[0].start_block = cpu_to_be32(start);
-			hip->first_extents[0].block_count = cpu_to_be32(len);
+			HFSPLUS_I(inode).first_extents[0].start_block = cpu_to_be32(start);
+			HFSPLUS_I(inode).first_extents[0].block_count = cpu_to_be32(len);
 			res = 0;
 		} else {
 			/* try to append to extents in inode */
-			res = hfsplus_add_extent(hip->first_extents,
-						 hip->alloc_blocks,
+			res = hfsplus_add_extent(HFSPLUS_I(inode).first_extents,
+						 HFSPLUS_I(inode).alloc_blocks,
 						 start, len);
 			if (res == -ENOSPC)
 				goto insert_extent;
 		}
 		if (!res) {
-			hfsplus_dump_extent(hip->first_extents);
-			hip->first_blocks += len;
+			hfsplus_dump_extent(HFSPLUS_I(inode).first_extents);
+			HFSPLUS_I(inode).first_blocks += len;
 		}
 	} else {
-		res = hfsplus_add_extent(hip->cached_extents,
-					 hip->alloc_blocks - hip->cached_start,
+		res = hfsplus_add_extent(HFSPLUS_I(inode).cached_extents,
+					 HFSPLUS_I(inode).alloc_blocks -
+					 HFSPLUS_I(inode).cached_start,
 					 start, len);
 		if (!res) {
-			hfsplus_dump_extent(hip->cached_extents);
-			hip->flags |= HFSPLUS_FLG_EXT_DIRTY;
-			hip->cached_blocks += len;
+			hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents);
+			HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY;
+			HFSPLUS_I(inode).cached_blocks += len;
 		} else if (res == -ENOSPC)
 			goto insert_extent;
 	}
 out:
-	mutex_unlock(&hip->extents_lock);
+	mutex_unlock(&HFSPLUS_I(inode).extents_lock);
 	if (!res) {
-		hip->alloc_blocks += len;
+		HFSPLUS_I(inode).alloc_blocks += len;
 		mark_inode_dirty(inode);
 	}
 	return res;
 
 insert_extent:
 	dprint(DBG_EXTENT, "insert new extent\n");
-	hfsplus_ext_write_extent_locked(inode);
+	hfsplus_ext_write_extent(inode);
 
-	memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
-	hip->cached_extents[0].start_block = cpu_to_be32(start);
-	hip->cached_extents[0].block_count = cpu_to_be32(len);
-	hfsplus_dump_extent(hip->cached_extents);
-	hip->flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW;
-	hip->cached_start = hip->alloc_blocks;
-	hip->cached_blocks = len;
+	memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec));
+	HFSPLUS_I(inode).cached_extents[0].start_block = cpu_to_be32(start);
+	HFSPLUS_I(inode).cached_extents[0].block_count = cpu_to_be32(len);
+	hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents);
+	HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW;
+	HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).alloc_blocks;
+	HFSPLUS_I(inode).cached_blocks = len;
 
 	res = 0;
 	goto out;
@@ -461,15 +437,13 @@ int hfsplus_file_extend(struct inode *inode)
 void hfsplus_file_truncate(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
-	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
 	struct hfs_find_data fd;
 	u32 alloc_cnt, blk_cnt, start;
 	int res;
 
-	dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n",
-		inode->i_ino, (long long)hip->phys_size, inode->i_size);
-
-	if (inode->i_size > hip->phys_size) {
+	dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", inode->i_ino,
+	       (long long)HFSPLUS_I(inode).phys_size, inode->i_size);
+	if (inode->i_size > HFSPLUS_I(inode).phys_size) {
 		struct address_space *mapping = inode->i_mapping;
 		struct page *page;
 		void *fsdata;
@@ -486,48 +460,47 @@ void hfsplus_file_truncate(struct inode *inode)
 			return;
 		mark_inode_dirty(inode);
 		return;
-	} else if (inode->i_size == hip->phys_size)
+	} else if (inode->i_size == HFSPLUS_I(inode).phys_size)
 		return;
 
-	blk_cnt = (inode->i_size + HFSPLUS_SB(sb)->alloc_blksz - 1) >>
-			HFSPLUS_SB(sb)->alloc_blksz_shift;
-	alloc_cnt = hip->alloc_blocks;
+	blk_cnt = (inode->i_size + HFSPLUS_SB(sb).alloc_blksz - 1) >> HFSPLUS_SB(sb).alloc_blksz_shift;
+	alloc_cnt = HFSPLUS_I(inode).alloc_blocks;
 	if (blk_cnt == alloc_cnt)
 		goto out;
 
-	mutex_lock(&hip->extents_lock);
-	hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
+	mutex_lock(&HFSPLUS_I(inode).extents_lock);
+	hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd);
 	while (1) {
-		if (alloc_cnt == hip->first_blocks) {
-			hfsplus_free_extents(sb, hip->first_extents,
+		if (alloc_cnt == HFSPLUS_I(inode).first_blocks) {
+			hfsplus_free_extents(sb, HFSPLUS_I(inode).first_extents,
 					     alloc_cnt, alloc_cnt - blk_cnt);
-			hfsplus_dump_extent(hip->first_extents);
-			hip->first_blocks = blk_cnt;
+			hfsplus_dump_extent(HFSPLUS_I(inode).first_extents);
+			HFSPLUS_I(inode).first_blocks = blk_cnt;
 			break;
 		}
 		res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt);
 		if (res)
 			break;
-		start = hip->cached_start;
-		hfsplus_free_extents(sb, hip->cached_extents,
+		start = HFSPLUS_I(inode).cached_start;
+		hfsplus_free_extents(sb, HFSPLUS_I(inode).cached_extents,
 				     alloc_cnt - start, alloc_cnt - blk_cnt);
-		hfsplus_dump_extent(hip->cached_extents);
+		hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents);
 		if (blk_cnt > start) {
-			hip->flags |= HFSPLUS_FLG_EXT_DIRTY;
+			HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY;
 			break;
 		}
 		alloc_cnt = start;
-		hip->cached_start = hip->cached_blocks = 0;
-		hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW);
+		HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).cached_blocks = 0;
+		HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW);
 		hfs_brec_remove(&fd);
 	}
 	hfs_find_exit(&fd);
-	mutex_unlock(&hip->extents_lock);
+	mutex_unlock(&HFSPLUS_I(inode).extents_lock);
 
-	hip->alloc_blocks = blk_cnt;
+	HFSPLUS_I(inode).alloc_blocks = blk_cnt;
 out:
-	hip->phys_size = inode->i_size;
-	hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
-	inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits);
+	HFSPLUS_I(inode).phys_size = inode->i_size;
+	HFSPLUS_I(inode).fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+	inode_set_bytes(inode, HFSPLUS_I(inode).fs_blocks << sb->s_blocksize_bits);
 	mark_inode_dirty(inode);
 }
diff --git a/trunk/fs/hfsplus/hfsplus_fs.h b/trunk/fs/hfsplus/hfsplus_fs.h
index cb3653efb57a..dc856be3c2b0 100644
--- a/trunk/fs/hfsplus/hfsplus_fs.h
+++ b/trunk/fs/hfsplus/hfsplus_fs.h
@@ -62,7 +62,7 @@ struct hfs_btree {
 	unsigned int depth;
 
 	//unsigned int map1_size, map_size;
-	struct mutex tree_lock;
+	struct semaphore tree_lock;
 
 	unsigned int pages_per_bnode;
 	spinlock_t hash_lock;
@@ -121,21 +121,16 @@ struct hfsplus_sb_info {
 	u32 sect_count;
 	int fs_shift;
 
-	/* immutable data from the volume header */
+	/* Stuff in host order from Vol Header */
 	u32 alloc_blksz;
 	int alloc_blksz_shift;
 	u32 total_blocks;
-	u32 data_clump_blocks, rsrc_clump_blocks;
-
-	/* mutable data from the volume header, protected by alloc_mutex */
 	u32 free_blocks;
-	struct mutex alloc_mutex;
-
-	/* mutable data from the volume header, protected by vh_mutex */
+	u32 next_alloc;
 	u32 next_cnid;
 	u32 file_count;
 	u32 folder_count;
-	struct mutex vh_mutex;
+	u32 data_clump_blocks, rsrc_clump_blocks;
 
 	/* Config options */
 	u32 creator;
@@ -148,50 +143,40 @@ struct hfsplus_sb_info {
 	int part, session;
 
 	unsigned long flags;
+
+	struct hlist_head rsrc_inodes;
 };
 
-#define HFSPLUS_SB_WRITEBACKUP	0
-#define HFSPLUS_SB_NODECOMPOSE	1
-#define HFSPLUS_SB_FORCE	2
-#define HFSPLUS_SB_HFSX		3
-#define HFSPLUS_SB_CASEFOLD	4
+#define HFSPLUS_SB_WRITEBACKUP	0x0001
+#define HFSPLUS_SB_NODECOMPOSE	0x0002
+#define HFSPLUS_SB_FORCE	0x0004
+#define HFSPLUS_SB_HFSX		0x0008
+#define HFSPLUS_SB_CASEFOLD	0x0010
 
 
 struct hfsplus_inode_info {
-	atomic_t opencnt;
-
-	/*
-	 * Extent allocation information, protected by extents_lock.
-	 */
-	u32 first_blocks;
-	u32 clump_blocks;
-	u32 alloc_blocks;
-	u32 cached_start;
-	u32 cached_blocks;
+	struct mutex extents_lock;
+	u32 clump_blocks, alloc_blocks;
+	sector_t fs_blocks;
+	/* Allocation extents from catalog record or volume header */
 	hfsplus_extent_rec first_extents;
+	u32 first_blocks;
 	hfsplus_extent_rec cached_extents;
-	unsigned long flags;
-	struct mutex extents_lock;
+	u32 cached_start, cached_blocks;
+	atomic_t opencnt;
 
-	/*
-	 * Immutable data.
-	 */
 	struct inode *rsrc_inode;
-	__be32 create_date;
+	unsigned long flags;
 
-	/*
-	 * Protected by sbi->vh_mutex.
-	 */
-	u32 linkid;
+	__be32 create_date;
+	/* Device number in hfsplus_permissions in catalog */
+	u32 dev;
+	/* BSD system and user file flags */
+	u8 rootflags;
+	u8 userflags;
 
-	/*
-	 * Protected by i_mutex.
-	 */
-	sector_t fs_blocks;
-	u8 userflags;		/* BSD user file flags */
 	struct list_head open_dir_list;
 	loff_t phys_size;
-
 	struct inode vfs_inode;
 };
 
@@ -199,8 +184,8 @@ struct hfsplus_inode_info {
 #define HFSPLUS_FLG_EXT_DIRTY	0x0002
 #define HFSPLUS_FLG_EXT_NEW	0x0004
 
-#define HFSPLUS_IS_DATA(inode)   (!(HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC))
-#define HFSPLUS_IS_RSRC(inode)   (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC)
+#define HFSPLUS_IS_DATA(inode)   (!(HFSPLUS_I(inode).flags & HFSPLUS_FLG_RSRC))
+#define HFSPLUS_IS_RSRC(inode)   (HFSPLUS_I(inode).flags & HFSPLUS_FLG_RSRC)
 
 struct hfs_find_data {
 	/* filled by caller */
@@ -326,7 +311,6 @@ int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *);
 int hfsplus_delete_cat(u32, struct inode *, struct qstr *);
 int hfsplus_rename_cat(u32, struct inode *, struct qstr *,
 		       struct inode *, struct qstr *);
-void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms);
 
 /* dir.c */
 extern const struct inode_operations hfsplus_dir_inode_operations;
@@ -388,15 +372,26 @@ int hfsplus_read_wrapper(struct super_block *);
 int hfs_part_find(struct super_block *, sector_t *, sector_t *);
 
 /* access macros */
+/*
 static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
 }
-
 static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode)
 {
 	return list_entry(inode, struct hfsplus_inode_info, vfs_inode);
 }
+*/
+#define HFSPLUS_SB(super)	(*(struct hfsplus_sb_info *)(super)->s_fs_info)
+#define HFSPLUS_I(inode)	(*list_entry(inode, struct hfsplus_inode_info, vfs_inode))
+
+#if 1
+#define hfsplus_kmap(p)		({ struct page *__p = (p); kmap(__p); })
+#define hfsplus_kunmap(p)	({ struct page *__p = (p); kunmap(__p); __p; })
+#else
+#define hfsplus_kmap(p)		kmap(p)
+#define hfsplus_kunmap(p)	kunmap(p)
+#endif
 
 #define sb_bread512(sb, sec, data) ({			\
 	struct buffer_head *__bh;			\
@@ -424,4 +419,6 @@ static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode)
 #define hfsp_ut2mt(t)		__hfsp_ut2mt((t).tv_sec)
 #define hfsp_now2mt()		__hfsp_ut2mt(get_seconds())
 
+#define kdev_t_to_nr(x)		(x)
+
 #endif
diff --git a/trunk/fs/hfsplus/hfsplus_raw.h b/trunk/fs/hfsplus/hfsplus_raw.h
index 6892899fd6fb..fe99fe8db61a 100644
--- a/trunk/fs/hfsplus/hfsplus_raw.h
+++ b/trunk/fs/hfsplus/hfsplus_raw.h
@@ -200,7 +200,6 @@ struct hfsplus_cat_key {
 	struct hfsplus_unistr name;
 } __packed;
 
-#define HFSPLUS_CAT_KEYLEN	(sizeof(struct hfsplus_cat_key))
 
 /* Structs from hfs.h */
 struct hfsp_point {
@@ -324,7 +323,7 @@ struct hfsplus_ext_key {
 	__be32 start_block;
 } __packed;
 
-#define HFSPLUS_EXT_KEYLEN	sizeof(struct hfsplus_ext_key)
+#define HFSPLUS_EXT_KEYLEN 12
 
 /* HFS+ generic BTree key */
 typedef union {
diff --git a/trunk/fs/hfsplus/inode.c b/trunk/fs/hfsplus/inode.c
index 78449280dae0..c5a979d62c65 100644
--- a/trunk/fs/hfsplus/inode.c
+++ b/trunk/fs/hfsplus/inode.c
@@ -36,7 +36,7 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping,
 	*pagep = NULL;
 	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				hfsplus_get_block,
-				&HFSPLUS_I(mapping->host)->phys_size);
+				&HFSPLUS_I(mapping->host).phys_size);
 	if (unlikely(ret)) {
 		loff_t isize = mapping->host->i_size;
 		if (pos + len > isize)
@@ -62,13 +62,13 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
 
 	switch (inode->i_ino) {
 	case HFSPLUS_EXT_CNID:
-		tree = HFSPLUS_SB(sb)->ext_tree;
+		tree = HFSPLUS_SB(sb).ext_tree;
 		break;
 	case HFSPLUS_CAT_CNID:
-		tree = HFSPLUS_SB(sb)->cat_tree;
+		tree = HFSPLUS_SB(sb).cat_tree;
 		break;
 	case HFSPLUS_ATTR_CNID:
-		tree = HFSPLUS_SB(sb)->attr_tree;
+		tree = HFSPLUS_SB(sb).attr_tree;
 		break;
 	default:
 		BUG();
@@ -172,13 +172,12 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
 	struct hfs_find_data fd;
 	struct super_block *sb = dir->i_sb;
 	struct inode *inode = NULL;
-	struct hfsplus_inode_info *hip;
 	int err;
 
 	if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc"))
 		goto out;
 
-	inode = HFSPLUS_I(dir)->rsrc_inode;
+	inode = HFSPLUS_I(dir).rsrc_inode;
 	if (inode)
 		goto out;
 
@@ -186,13 +185,12 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
-	hip = HFSPLUS_I(inode);
 	inode->i_ino = dir->i_ino;
-	INIT_LIST_HEAD(&hip->open_dir_list);
-	mutex_init(&hip->extents_lock);
-	hip->flags = HFSPLUS_FLG_RSRC;
+	INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
+	mutex_init(&HFSPLUS_I(inode).extents_lock);
+	HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC;
 
-	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+	hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
 	err = hfsplus_find_cat(sb, dir->i_ino, &fd);
 	if (!err)
 		err = hfsplus_cat_read_inode(inode, &fd);
@@ -201,18 +199,10 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
 		iput(inode);
 		return ERR_PTR(err);
 	}
-	hip->rsrc_inode = dir;
-	HFSPLUS_I(dir)->rsrc_inode = inode;
+	HFSPLUS_I(inode).rsrc_inode = dir;
+	HFSPLUS_I(dir).rsrc_inode = inode;
 	igrab(dir);
-
-	/*
-	 * __mark_inode_dirty expects inodes to be hashed.  Since we don't
-	 * want resource fork inodes in the regular inode space, we make them
-	 * appear hashed, but do not put on any lists.  hlist_del()
-	 * will work fine and require no locking.
-	 */
-	inode->i_hash.pprev = &inode->i_hash.next;
-
+	hlist_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes);
 	mark_inode_dirty(inode);
 out:
 	d_add(dentry, inode);
@@ -221,27 +211,30 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
 
 static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
+	struct super_block *sb = inode->i_sb;
 	u16 mode;
 
 	mode = be16_to_cpu(perms->mode);
 
 	inode->i_uid = be32_to_cpu(perms->owner);
 	if (!inode->i_uid && !mode)
-		inode->i_uid = sbi->uid;
+		inode->i_uid = HFSPLUS_SB(sb).uid;
 
 	inode->i_gid = be32_to_cpu(perms->group);
 	if (!inode->i_gid && !mode)
-		inode->i_gid = sbi->gid;
+		inode->i_gid = HFSPLUS_SB(sb).gid;
 
 	if (dir) {
-		mode = mode ? (mode & S_IALLUGO) : (S_IRWXUGO & ~(sbi->umask));
+		mode = mode ? (mode & S_IALLUGO) :
+			(S_IRWXUGO & ~(HFSPLUS_SB(sb).umask));
 		mode |= S_IFDIR;
 	} else if (!mode)
-		mode = S_IFREG | ((S_IRUGO|S_IWUGO) & ~(sbi->umask));
+		mode = S_IFREG | ((S_IRUGO|S_IWUGO) &
+			~(HFSPLUS_SB(sb).umask));
 	inode->i_mode = mode;
 
-	HFSPLUS_I(inode)->userflags = perms->userflags;
+	HFSPLUS_I(inode).rootflags = perms->rootflags;
+	HFSPLUS_I(inode).userflags = perms->userflags;
 	if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE)
 		inode->i_flags |= S_IMMUTABLE;
 	else
@@ -252,13 +245,30 @@ static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, i
 		inode->i_flags &= ~S_APPEND;
 }
 
+static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms)
+{
+	if (inode->i_flags & S_IMMUTABLE)
+		perms->rootflags |= HFSPLUS_FLG_IMMUTABLE;
+	else
+		perms->rootflags &= ~HFSPLUS_FLG_IMMUTABLE;
+	if (inode->i_flags & S_APPEND)
+		perms->rootflags |= HFSPLUS_FLG_APPEND;
+	else
+		perms->rootflags &= ~HFSPLUS_FLG_APPEND;
+	perms->userflags = HFSPLUS_I(inode).userflags;
+	perms->mode = cpu_to_be16(inode->i_mode);
+	perms->owner = cpu_to_be32(inode->i_uid);
+	perms->group = cpu_to_be32(inode->i_gid);
+	perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
+}
+
 static int hfsplus_file_open(struct inode *inode, struct file *file)
 {
 	if (HFSPLUS_IS_RSRC(inode))
-		inode = HFSPLUS_I(inode)->rsrc_inode;
+		inode = HFSPLUS_I(inode).rsrc_inode;
 	if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
 		return -EOVERFLOW;
-	atomic_inc(&HFSPLUS_I(inode)->opencnt);
+	atomic_inc(&HFSPLUS_I(inode).opencnt);
 	return 0;
 }
 
@@ -267,13 +277,12 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
 	struct super_block *sb = inode->i_sb;
 
 	if (HFSPLUS_IS_RSRC(inode))
-		inode = HFSPLUS_I(inode)->rsrc_inode;
-	if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) {
+		inode = HFSPLUS_I(inode).rsrc_inode;
+	if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) {
 		mutex_lock(&inode->i_mutex);
 		hfsplus_file_truncate(inode);
 		if (inode->i_flags & S_DEAD) {
-			hfsplus_delete_cat(inode->i_ino,
-					   HFSPLUS_SB(sb)->hidden_dir, NULL);
+			hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
 			hfsplus_delete_inode(inode);
 		}
 		mutex_unlock(&inode->i_mutex);
@@ -352,52 +361,47 @@ static const struct file_operations hfsplus_file_operations = {
 
 struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
 	struct inode *inode = new_inode(sb);
-	struct hfsplus_inode_info *hip;
-
 	if (!inode)
 		return NULL;
 
-	inode->i_ino = sbi->next_cnid++;
+	inode->i_ino = HFSPLUS_SB(sb).next_cnid++;
 	inode->i_mode = mode;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
 	inode->i_nlink = 1;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
-
-	hip = HFSPLUS_I(inode);
-	INIT_LIST_HEAD(&hip->open_dir_list);
-	mutex_init(&hip->extents_lock);
-	atomic_set(&hip->opencnt, 0);
-	hip->flags = 0;
-	memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec));
-	memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
-	hip->alloc_blocks = 0;
-	hip->first_blocks = 0;
-	hip->cached_start = 0;
-	hip->cached_blocks = 0;
-	hip->phys_size = 0;
-	hip->fs_blocks = 0;
-	hip->rsrc_inode = NULL;
+	INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
+	mutex_init(&HFSPLUS_I(inode).extents_lock);
+	atomic_set(&HFSPLUS_I(inode).opencnt, 0);
+	HFSPLUS_I(inode).flags = 0;
+	memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec));
+	memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec));
+	HFSPLUS_I(inode).alloc_blocks = 0;
+	HFSPLUS_I(inode).first_blocks = 0;
+	HFSPLUS_I(inode).cached_start = 0;
+	HFSPLUS_I(inode).cached_blocks = 0;
+	HFSPLUS_I(inode).phys_size = 0;
+	HFSPLUS_I(inode).fs_blocks = 0;
+	HFSPLUS_I(inode).rsrc_inode = NULL;
 	if (S_ISDIR(inode->i_mode)) {
 		inode->i_size = 2;
-		sbi->folder_count++;
+		HFSPLUS_SB(sb).folder_count++;
 		inode->i_op = &hfsplus_dir_inode_operations;
 		inode->i_fop = &hfsplus_dir_operations;
 	} else if (S_ISREG(inode->i_mode)) {
-		sbi->file_count++;
+		HFSPLUS_SB(sb).file_count++;
 		inode->i_op = &hfsplus_file_inode_operations;
 		inode->i_fop = &hfsplus_file_operations;
 		inode->i_mapping->a_ops = &hfsplus_aops;
-		hip->clump_blocks = sbi->data_clump_blocks;
+		HFSPLUS_I(inode).clump_blocks = HFSPLUS_SB(sb).data_clump_blocks;
 	} else if (S_ISLNK(inode->i_mode)) {
-		sbi->file_count++;
+		HFSPLUS_SB(sb).file_count++;
 		inode->i_op = &page_symlink_inode_operations;
 		inode->i_mapping->a_ops = &hfsplus_aops;
-		hip->clump_blocks = 1;
+		HFSPLUS_I(inode).clump_blocks = 1;
 	} else
-		sbi->file_count++;
+		HFSPLUS_SB(sb).file_count++;
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
 	sb->s_dirt = 1;
@@ -410,11 +414,11 @@ void hfsplus_delete_inode(struct inode *inode)
 	struct super_block *sb = inode->i_sb;
 
 	if (S_ISDIR(inode->i_mode)) {
-		HFSPLUS_SB(sb)->folder_count--;
+		HFSPLUS_SB(sb).folder_count--;
 		sb->s_dirt = 1;
 		return;
 	}
-	HFSPLUS_SB(sb)->file_count--;
+	HFSPLUS_SB(sb).file_count--;
 	if (S_ISREG(inode->i_mode)) {
 		if (!inode->i_nlink) {
 			inode->i_size = 0;
@@ -430,39 +434,34 @@ void hfsplus_delete_inode(struct inode *inode)
 void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork)
 {
 	struct super_block *sb = inode->i_sb;
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
-	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
 	u32 count;
 	int i;
 
-	memcpy(&hip->first_extents, &fork->extents, sizeof(hfsplus_extent_rec));
+	memcpy(&HFSPLUS_I(inode).first_extents, &fork->extents,
+	       sizeof(hfsplus_extent_rec));
 	for (count = 0, i = 0; i < 8; i++)
 		count += be32_to_cpu(fork->extents[i].block_count);
-	hip->first_blocks = count;
-	memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
-	hip->cached_start = 0;
-	hip->cached_blocks = 0;
-
-	hip->alloc_blocks = be32_to_cpu(fork->total_blocks);
-	hip->phys_size = inode->i_size = be64_to_cpu(fork->total_size);
-	hip->fs_blocks =
-		(inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
-	inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits);
-	hip->clump_blocks =
-		be32_to_cpu(fork->clump_size) >> sbi->alloc_blksz_shift;
-	if (!hip->clump_blocks) {
-		hip->clump_blocks = HFSPLUS_IS_RSRC(inode) ?
-			sbi->rsrc_clump_blocks :
-			sbi->data_clump_blocks;
-	}
+	HFSPLUS_I(inode).first_blocks = count;
+	memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec));
+	HFSPLUS_I(inode).cached_start = 0;
+	HFSPLUS_I(inode).cached_blocks = 0;
+
+	HFSPLUS_I(inode).alloc_blocks = be32_to_cpu(fork->total_blocks);
+	inode->i_size = HFSPLUS_I(inode).phys_size = be64_to_cpu(fork->total_size);
+	HFSPLUS_I(inode).fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+	inode_set_bytes(inode, HFSPLUS_I(inode).fs_blocks << sb->s_blocksize_bits);
+	HFSPLUS_I(inode).clump_blocks = be32_to_cpu(fork->clump_size) >> HFSPLUS_SB(sb).alloc_blksz_shift;
+	if (!HFSPLUS_I(inode).clump_blocks)
+		HFSPLUS_I(inode).clump_blocks = HFSPLUS_IS_RSRC(inode) ? HFSPLUS_SB(sb).rsrc_clump_blocks :
+				HFSPLUS_SB(sb).data_clump_blocks;
 }
 
 void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork)
 {
-	memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents,
+	memcpy(&fork->extents, &HFSPLUS_I(inode).first_extents,
 	       sizeof(hfsplus_extent_rec));
 	fork->total_size = cpu_to_be64(inode->i_size);
-	fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode)->alloc_blocks);
+	fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode).alloc_blocks);
 }
 
 int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
@@ -473,7 +472,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
 
 	type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset);
 
-	HFSPLUS_I(inode)->linkid = 0;
+	HFSPLUS_I(inode).dev = 0;
 	if (type == HFSPLUS_FOLDER) {
 		struct hfsplus_cat_folder *folder = &entry.folder;
 
@@ -487,8 +486,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
 		inode->i_atime = hfsp_mt2ut(folder->access_date);
 		inode->i_mtime = hfsp_mt2ut(folder->content_mod_date);
 		inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date);
-		HFSPLUS_I(inode)->create_date = folder->create_date;
-		HFSPLUS_I(inode)->fs_blocks = 0;
+		HFSPLUS_I(inode).create_date = folder->create_date;
+		HFSPLUS_I(inode).fs_blocks = 0;
 		inode->i_op = &hfsplus_dir_inode_operations;
 		inode->i_fop = &hfsplus_dir_operations;
 	} else if (type == HFSPLUS_FILE) {
@@ -519,7 +518,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
 		inode->i_atime = hfsp_mt2ut(file->access_date);
 		inode->i_mtime = hfsp_mt2ut(file->content_mod_date);
 		inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date);
-		HFSPLUS_I(inode)->create_date = file->create_date;
+		HFSPLUS_I(inode).create_date = file->create_date;
 	} else {
 		printk(KERN_ERR "hfs: bad catalog entry used to create inode\n");
 		res = -EIO;
@@ -534,12 +533,12 @@ int hfsplus_cat_write_inode(struct inode *inode)
 	hfsplus_cat_entry entry;
 
 	if (HFSPLUS_IS_RSRC(inode))
-		main_inode = HFSPLUS_I(inode)->rsrc_inode;
+		main_inode = HFSPLUS_I(inode).rsrc_inode;
 
 	if (!main_inode->i_nlink)
 		return 0;
 
-	if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb)->cat_tree, &fd))
+	if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb).cat_tree, &fd))
 		/* panic? */
 		return -EIO;
 
@@ -555,7 +554,7 @@ int hfsplus_cat_write_inode(struct inode *inode)
 		hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
 					sizeof(struct hfsplus_cat_folder));
 		/* simple node checks? */
-		hfsplus_cat_set_perms(inode, &folder->permissions);
+		hfsplus_set_perms(inode, &folder->permissions);
 		folder->access_date = hfsp_ut2mt(inode->i_atime);
 		folder->content_mod_date = hfsp_ut2mt(inode->i_mtime);
 		folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime);
@@ -577,7 +576,11 @@ int hfsplus_cat_write_inode(struct inode *inode)
 		hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
 					sizeof(struct hfsplus_cat_file));
 		hfsplus_inode_write_fork(inode, &file->data_fork);
-		hfsplus_cat_set_perms(inode, &file->permissions);
+		if (S_ISREG(inode->i_mode))
+			HFSPLUS_I(inode).dev = inode->i_nlink;
+		if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+			HFSPLUS_I(inode).dev = kdev_t_to_nr(inode->i_rdev);
+		hfsplus_set_perms(inode, &file->permissions);
 		if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE)
 			file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED);
 		else
diff --git a/trunk/fs/hfsplus/ioctl.c b/trunk/fs/hfsplus/ioctl.c
index 5b4667e08ef7..ac405f099026 100644
--- a/trunk/fs/hfsplus/ioctl.c
+++ b/trunk/fs/hfsplus/ioctl.c
@@ -17,98 +17,83 @@
 #include <linux/mount.h>
 #include <linux/sched.h>
 #include <linux/xattr.h>
+#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 #include "hfsplus_fs.h"
 
-static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
+long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
-	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
-	unsigned int flags = 0;
-
-	if (inode->i_flags & S_IMMUTABLE)
-		flags |= FS_IMMUTABLE_FL;
-	if (inode->i_flags |= S_APPEND)
-		flags |= FS_APPEND_FL;
-	if (hip->userflags & HFSPLUS_FLG_NODUMP)
-		flags |= FS_NODUMP_FL;
-
-	return put_user(flags, user_flags);
-}
-
-static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
-{
-	struct inode *inode = file->f_path.dentry->d_inode;
-	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	unsigned int flags;
-	int err = 0;
-
-	err = mnt_want_write(file->f_path.mnt);
-	if (err)
-		goto out;
-
-	if (!is_owner_or_cap(inode)) {
-		err = -EACCES;
-		goto out_drop_write;
-	}
-
-	if (get_user(flags, user_flags)) {
-		err = -EFAULT;
-		goto out_drop_write;
-	}
 
-	mutex_lock(&inode->i_mutex);
+	lock_kernel();
+	switch (cmd) {
+	case HFSPLUS_IOC_EXT2_GETFLAGS:
+		flags = 0;
+		if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE)
+			flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */
+		if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND)
+			flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */
+		if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP)
+			flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */
+		return put_user(flags, (int __user *)arg);
+	case HFSPLUS_IOC_EXT2_SETFLAGS: {
+		int err = 0;
+		err = mnt_want_write(filp->f_path.mnt);
+		if (err) {
+			unlock_kernel();
+			return err;
+		}
 
-	if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) ||
-	    inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
-		if (!capable(CAP_LINUX_IMMUTABLE)) {
-			err = -EPERM;
-			goto out_unlock_inode;
+		if (!is_owner_or_cap(inode)) {
+			err = -EACCES;
+			goto setflags_out;
+		}
+		if (get_user(flags, (int __user *)arg)) {
+			err = -EFAULT;
+			goto setflags_out;
+		}
+		if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) ||
+		    HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) {
+			if (!capable(CAP_LINUX_IMMUTABLE)) {
+				err = -EPERM;
+				goto setflags_out;
+			}
 		}
-	}
 
-	/* don't silently ignore unsupported ext2 flags */
-	if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) {
-		err = -EOPNOTSUPP;
-		goto out_unlock_inode;
+		/* don't silently ignore unsupported ext2 flags */
+		if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) {
+			err = -EOPNOTSUPP;
+			goto setflags_out;
+		}
+		if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */
+			inode->i_flags |= S_IMMUTABLE;
+			HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE;
+		} else {
+			inode->i_flags &= ~S_IMMUTABLE;
+			HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE;
+		}
+		if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */
+			inode->i_flags |= S_APPEND;
+			HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND;
+		} else {
+			inode->i_flags &= ~S_APPEND;
+			HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND;
+		}
+		if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */
+			HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP;
+		else
+			HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP;
+
+		inode->i_ctime = CURRENT_TIME_SEC;
+		mark_inode_dirty(inode);
+setflags_out:
+		mnt_drop_write(filp->f_path.mnt);
+		unlock_kernel();
+		return err;
 	}
-
-	if (flags & FS_IMMUTABLE_FL)
-		inode->i_flags |= S_IMMUTABLE;
-	else
-		inode->i_flags &= ~S_IMMUTABLE;
-
-	if (flags & FS_APPEND_FL)
-		inode->i_flags |= S_APPEND;
-	else
-		inode->i_flags &= ~S_APPEND;
-
-	if (flags & FS_NODUMP_FL)
-		hip->userflags |= HFSPLUS_FLG_NODUMP;
-	else
-		hip->userflags &= ~HFSPLUS_FLG_NODUMP;
-
-	inode->i_ctime = CURRENT_TIME_SEC;
-	mark_inode_dirty(inode);
-
-out_unlock_inode:
-	mutex_lock(&inode->i_mutex);
-out_drop_write:
-	mnt_drop_write(file->f_path.mnt);
-out:
-	return err;
-}
-
-long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	void __user *argp = (void __user *)arg;
-
-	switch (cmd) {
-	case HFSPLUS_IOC_EXT2_GETFLAGS:
-		return hfsplus_ioctl_getflags(file, argp);
-	case HFSPLUS_IOC_EXT2_SETFLAGS:
-		return hfsplus_ioctl_setflags(file, argp);
 	default:
+		unlock_kernel();
 		return -ENOTTY;
 	}
 }
@@ -125,7 +110,7 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name,
 	if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode))
 		return -EOPNOTSUPP;
 
-	res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
+	res = hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd);
 	if (res)
 		return res;
 	res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
@@ -168,7 +153,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
 		return -EOPNOTSUPP;
 
 	if (size) {
-		res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
+		res = hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd);
 		if (res)
 			return res;
 		res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
@@ -192,7 +177,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
 		} else
 			res = size ? -ERANGE : 4;
 	} else
-		res = -EOPNOTSUPP;
+		res = -ENODATA;
 out:
 	if (size)
 		hfs_find_exit(&fd);
diff --git a/trunk/fs/hfsplus/options.c b/trunk/fs/hfsplus/options.c
index f9ab276a4d8d..572628b4b07d 100644
--- a/trunk/fs/hfsplus/options.c
+++ b/trunk/fs/hfsplus/options.c
@@ -143,13 +143,13 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
 			kfree(p);
 			break;
 		case opt_decompose:
-			clear_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags);
+			sbi->flags &= ~HFSPLUS_SB_NODECOMPOSE;
 			break;
 		case opt_nodecompose:
-			set_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags);
+			sbi->flags |= HFSPLUS_SB_NODECOMPOSE;
 			break;
 		case opt_force:
-			set_bit(HFSPLUS_SB_FORCE, &sbi->flags);
+			sbi->flags |= HFSPLUS_SB_FORCE;
 			break;
 		default:
 			return 0;
@@ -171,7 +171,7 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
 
 int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(mnt->mnt_sb);
+	struct hfsplus_sb_info *sbi = &HFSPLUS_SB(mnt->mnt_sb);
 
 	if (sbi->creator != HFSPLUS_DEF_CR_TYPE)
 		seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator);
@@ -184,7 +184,7 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt)
 		seq_printf(seq, ",session=%u", sbi->session);
 	if (sbi->nls)
 		seq_printf(seq, ",nls=%s", sbi->nls->charset);
-	if (test_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags))
+	if (sbi->flags & HFSPLUS_SB_NODECOMPOSE)
 		seq_printf(seq, ",nodecompose");
 	return 0;
 }
diff --git a/trunk/fs/hfsplus/part_tbl.c b/trunk/fs/hfsplus/part_tbl.c
index 208b16c645cc..1528a6fd0299 100644
--- a/trunk/fs/hfsplus/part_tbl.c
+++ b/trunk/fs/hfsplus/part_tbl.c
@@ -74,7 +74,6 @@ struct old_pmap {
 int hfs_part_find(struct super_block *sb,
 		  sector_t *part_start, sector_t *part_size)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
 	struct buffer_head *bh;
 	__be16 *data;
 	int i, size, res;
@@ -96,7 +95,7 @@ int hfs_part_find(struct super_block *sb,
 		for (i = 0; i < size; p++, i++) {
 			if (p->pdStart && p->pdSize &&
 			    p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ &&
-			    (sbi->part < 0 || sbi->part == i)) {
+			    (HFSPLUS_SB(sb).part < 0 || HFSPLUS_SB(sb).part == i)) {
 				*part_start += be32_to_cpu(p->pdStart);
 				*part_size = be32_to_cpu(p->pdSize);
 				res = 0;
@@ -112,7 +111,7 @@ int hfs_part_find(struct super_block *sb,
 		size = be32_to_cpu(pm->pmMapBlkCnt);
 		for (i = 0; i < size;) {
 			if (!memcmp(pm->pmPartType,"Apple_HFS", 9) &&
-			    (sbi->part < 0 || sbi->part == i)) {
+			    (HFSPLUS_SB(sb).part < 0 || HFSPLUS_SB(sb).part == i)) {
 				*part_start += be32_to_cpu(pm->pmPyPartStart);
 				*part_size = be32_to_cpu(pm->pmPartBlkCnt);
 				res = 0;
diff --git a/trunk/fs/hfsplus/super.c b/trunk/fs/hfsplus/super.c
index 9a88d7536103..3b55c050c742 100644
--- a/trunk/fs/hfsplus/super.c
+++ b/trunk/fs/hfsplus/super.c
@@ -12,6 +12,7 @@
 #include <linux/pagemap.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/vfs.h>
 #include <linux/nls.h>
 
@@ -20,11 +21,40 @@ static void hfsplus_destroy_inode(struct inode *inode);
 
 #include "hfsplus_fs.h"
 
-static int hfsplus_system_read_inode(struct inode *inode)
+struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
 {
-	struct hfsplus_vh *vhdr = HFSPLUS_SB(inode->i_sb)->s_vhdr;
+	struct hfs_find_data fd;
+	struct hfsplus_vh *vhdr;
+	struct inode *inode;
+	long err = -EIO;
 
-	switch (inode->i_ino) {
+	inode = iget_locked(sb, ino);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
+	mutex_init(&HFSPLUS_I(inode).extents_lock);
+	HFSPLUS_I(inode).flags = 0;
+	HFSPLUS_I(inode).rsrc_inode = NULL;
+	atomic_set(&HFSPLUS_I(inode).opencnt, 0);
+
+	if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) {
+	read_inode:
+		hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd);
+		err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
+		if (!err)
+			err = hfsplus_cat_read_inode(inode, &fd);
+		hfs_find_exit(&fd);
+		if (err)
+			goto bad_inode;
+		goto done;
+	}
+	vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr;
+	switch(inode->i_ino) {
+	case HFSPLUS_ROOT_CNID:
+		goto read_inode;
 	case HFSPLUS_EXT_CNID:
 		hfsplus_inode_read_fork(inode, &vhdr->ext_file);
 		inode->i_mapping->a_ops = &hfsplus_btree_aops;
@@ -45,101 +75,74 @@ static int hfsplus_system_read_inode(struct inode *inode)
 		inode->i_mapping->a_ops = &hfsplus_btree_aops;
 		break;
 	default:
-		return -EIO;
-	}
-
-	return 0;
-}
-
-struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
-{
-	struct hfs_find_data fd;
-	struct inode *inode;
-	int err;
-
-	inode = iget_locked(sb, ino);
-	if (!inode)
-		return ERR_PTR(-ENOMEM);
-	if (!(inode->i_state & I_NEW))
-		return inode;
-
-	INIT_LIST_HEAD(&HFSPLUS_I(inode)->open_dir_list);
-	mutex_init(&HFSPLUS_I(inode)->extents_lock);
-	HFSPLUS_I(inode)->flags = 0;
-	HFSPLUS_I(inode)->rsrc_inode = NULL;
-	atomic_set(&HFSPLUS_I(inode)->opencnt, 0);
-
-	if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID ||
-	    inode->i_ino == HFSPLUS_ROOT_CNID) {
-		hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
-		err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
-		if (!err)
-			err = hfsplus_cat_read_inode(inode, &fd);
-		hfs_find_exit(&fd);
-	} else {
-		err = hfsplus_system_read_inode(inode);
-	}
-
-	if (err) {
-		iget_failed(inode);
-		return ERR_PTR(err);
+		goto bad_inode;
 	}
 
+done:
 	unlock_new_inode(inode);
 	return inode;
+
+bad_inode:
+	iget_failed(inode);
+	return ERR_PTR(err);
 }
 
-static int hfsplus_system_write_inode(struct inode *inode)
+static int hfsplus_write_inode(struct inode *inode,
+		struct writeback_control *wbc)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
-	struct hfsplus_vh *vhdr = sbi->s_vhdr;
-	struct hfsplus_fork_raw *fork;
-	struct hfs_btree *tree = NULL;
+	struct hfsplus_vh *vhdr;
+	int ret = 0;
 
+	dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino);
+	hfsplus_ext_write_extent(inode);
+	if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) {
+		return hfsplus_cat_write_inode(inode);
+	}
+	vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr;
 	switch (inode->i_ino) {
+	case HFSPLUS_ROOT_CNID:
+		ret = hfsplus_cat_write_inode(inode);
+		break;
 	case HFSPLUS_EXT_CNID:
-		fork = &vhdr->ext_file;
-		tree = sbi->ext_tree;
+		if (vhdr->ext_file.total_size != cpu_to_be64(inode->i_size)) {
+			HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP;
+			inode->i_sb->s_dirt = 1;
+		}
+		hfsplus_inode_write_fork(inode, &vhdr->ext_file);
+		hfs_btree_write(HFSPLUS_SB(inode->i_sb).ext_tree);
 		break;
 	case HFSPLUS_CAT_CNID:
-		fork = &vhdr->cat_file;
-		tree = sbi->cat_tree;
+		if (vhdr->cat_file.total_size != cpu_to_be64(inode->i_size)) {
+			HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP;
+			inode->i_sb->s_dirt = 1;
+		}
+		hfsplus_inode_write_fork(inode, &vhdr->cat_file);
+		hfs_btree_write(HFSPLUS_SB(inode->i_sb).cat_tree);
 		break;
 	case HFSPLUS_ALLOC_CNID:
-		fork = &vhdr->alloc_file;
+		if (vhdr->alloc_file.total_size != cpu_to_be64(inode->i_size)) {
+			HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP;
+			inode->i_sb->s_dirt = 1;
+		}
+		hfsplus_inode_write_fork(inode, &vhdr->alloc_file);
 		break;
 	case HFSPLUS_START_CNID:
-		fork = &vhdr->start_file;
+		if (vhdr->start_file.total_size != cpu_to_be64(inode->i_size)) {
+			HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP;
+			inode->i_sb->s_dirt = 1;
+		}
+		hfsplus_inode_write_fork(inode, &vhdr->start_file);
 		break;
 	case HFSPLUS_ATTR_CNID:
-		fork = &vhdr->attr_file;
-		tree = sbi->attr_tree;
-	default:
-		return -EIO;
-	}
-
-	if (fork->total_size != cpu_to_be64(inode->i_size)) {
-		set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags);
-		inode->i_sb->s_dirt = 1;
+		if (vhdr->attr_file.total_size != cpu_to_be64(inode->i_size)) {
+			HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP;
+			inode->i_sb->s_dirt = 1;
+		}
+		hfsplus_inode_write_fork(inode, &vhdr->attr_file);
+		hfs_btree_write(HFSPLUS_SB(inode->i_sb).attr_tree);
+		break;
 	}
-	hfsplus_inode_write_fork(inode, fork);
-	if (tree)
-		hfs_btree_write(tree);
-	return 0;
-}
-
-static int hfsplus_write_inode(struct inode *inode,
-		struct writeback_control *wbc)
-{
-	dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino);
-
-	hfsplus_ext_write_extent(inode);
-
-	if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID ||
-	    inode->i_ino == HFSPLUS_ROOT_CNID)
-		return hfsplus_cat_write_inode(inode);
-	else
-		return hfsplus_system_write_inode(inode);
+	return ret;
 }
 
 static void hfsplus_evict_inode(struct inode *inode)
@@ -148,53 +151,51 @@ static void hfsplus_evict_inode(struct inode *inode)
 	truncate_inode_pages(&inode->i_data, 0);
 	end_writeback(inode);
 	if (HFSPLUS_IS_RSRC(inode)) {
-		HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
-		iput(HFSPLUS_I(inode)->rsrc_inode);
+		HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL;
+		iput(HFSPLUS_I(inode).rsrc_inode);
 	}
 }
 
 int hfsplus_sync_fs(struct super_block *sb, int wait)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
-	struct hfsplus_vh *vhdr = sbi->s_vhdr;
+	struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 
 	dprint(DBG_SUPER, "hfsplus_write_super\n");
 
-	mutex_lock(&sbi->vh_mutex);
-	mutex_lock(&sbi->alloc_mutex);
+	lock_super(sb);
 	sb->s_dirt = 0;
 
-	vhdr->free_blocks = cpu_to_be32(sbi->free_blocks);
-	vhdr->next_cnid = cpu_to_be32(sbi->next_cnid);
-	vhdr->folder_count = cpu_to_be32(sbi->folder_count);
-	vhdr->file_count = cpu_to_be32(sbi->file_count);
+	vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks);
+	vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc);
+	vhdr->next_cnid = cpu_to_be32(HFSPLUS_SB(sb).next_cnid);
+	vhdr->folder_count = cpu_to_be32(HFSPLUS_SB(sb).folder_count);
+	vhdr->file_count = cpu_to_be32(HFSPLUS_SB(sb).file_count);
 
-	mark_buffer_dirty(sbi->s_vhbh);
-	if (test_and_clear_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags)) {
-		if (sbi->sect_count) {
+	mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
+	if (HFSPLUS_SB(sb).flags & HFSPLUS_SB_WRITEBACKUP) {
+		if (HFSPLUS_SB(sb).sect_count) {
 			struct buffer_head *bh;
 			u32 block, offset;
 
-			block = sbi->blockoffset;
-			block += (sbi->sect_count - 2) >> (sb->s_blocksize_bits - 9);
-			offset = ((sbi->sect_count - 2) << 9) & (sb->s_blocksize - 1);
-			printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n",
-					  sbi->blockoffset, sbi->sect_count,
-					  block, offset);
+			block = HFSPLUS_SB(sb).blockoffset;
+			block += (HFSPLUS_SB(sb).sect_count - 2) >> (sb->s_blocksize_bits - 9);
+			offset = ((HFSPLUS_SB(sb).sect_count - 2) << 9) & (sb->s_blocksize - 1);
+			printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", HFSPLUS_SB(sb).blockoffset,
+				HFSPLUS_SB(sb).sect_count, block, offset);
 			bh = sb_bread(sb, block);
 			if (bh) {
 				vhdr = (struct hfsplus_vh *)(bh->b_data + offset);
 				if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) {
-					memcpy(vhdr, sbi->s_vhdr, sizeof(*vhdr));
+					memcpy(vhdr, HFSPLUS_SB(sb).s_vhdr, sizeof(*vhdr));
 					mark_buffer_dirty(bh);
 					brelse(bh);
 				} else
 					printk(KERN_WARNING "hfs: backup not found!\n");
 			}
 		}
+		HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP;
 	}
-	mutex_unlock(&sbi->alloc_mutex);
-	mutex_unlock(&sbi->vh_mutex);
+	unlock_super(sb);
 	return 0;
 }
 
@@ -208,48 +209,48 @@ static void hfsplus_write_super(struct super_block *sb)
 
 static void hfsplus_put_super(struct super_block *sb)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
-
 	dprint(DBG_SUPER, "hfsplus_put_super\n");
-
 	if (!sb->s_fs_info)
 		return;
 
+	lock_kernel();
+
 	if (sb->s_dirt)
 		hfsplus_write_super(sb);
-	if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) {
-		struct hfsplus_vh *vhdr = sbi->s_vhdr;
+	if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) {
+		struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 
 		vhdr->modify_date = hfsp_now2mt();
 		vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT);
 		vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT);
-		mark_buffer_dirty(sbi->s_vhbh);
-		sync_dirty_buffer(sbi->s_vhbh);
+		mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
+		sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh);
 	}
 
-	hfs_btree_close(sbi->cat_tree);
-	hfs_btree_close(sbi->ext_tree);
-	iput(sbi->alloc_file);
-	iput(sbi->hidden_dir);
-	brelse(sbi->s_vhbh);
-	unload_nls(sbi->nls);
+	hfs_btree_close(HFSPLUS_SB(sb).cat_tree);
+	hfs_btree_close(HFSPLUS_SB(sb).ext_tree);
+	iput(HFSPLUS_SB(sb).alloc_file);
+	iput(HFSPLUS_SB(sb).hidden_dir);
+	brelse(HFSPLUS_SB(sb).s_vhbh);
+	unload_nls(HFSPLUS_SB(sb).nls);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_sb;
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
 	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
 
 	buf->f_type = HFSPLUS_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_blocks = sbi->total_blocks << sbi->fs_shift;
-	buf->f_bfree = sbi->free_blocks << sbi->fs_shift;
+	buf->f_blocks = HFSPLUS_SB(sb).total_blocks << HFSPLUS_SB(sb).fs_shift;
+	buf->f_bfree = HFSPLUS_SB(sb).free_blocks << HFSPLUS_SB(sb).fs_shift;
 	buf->f_bavail = buf->f_bfree;
 	buf->f_files = 0xFFFFFFFF;
-	buf->f_ffree = 0xFFFFFFFF - sbi->next_cnid;
+	buf->f_ffree = 0xFFFFFFFF - HFSPLUS_SB(sb).next_cnid;
 	buf->f_fsid.val[0] = (u32)id;
 	buf->f_fsid.val[1] = (u32)(id >> 32);
 	buf->f_namelen = HFSPLUS_MAX_STRLEN;
@@ -262,11 +263,11 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
 		return 0;
 	if (!(*flags & MS_RDONLY)) {
-		struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr;
+		struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 		struct hfsplus_sb_info sbi;
 
 		memset(&sbi, 0, sizeof(struct hfsplus_sb_info));
-		sbi.nls = HFSPLUS_SB(sb)->nls;
+		sbi.nls = HFSPLUS_SB(sb).nls;
 		if (!hfsplus_parse_options(data, &sbi))
 			return -EINVAL;
 
@@ -275,7 +276,7 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
 			       "running fsck.hfsplus is recommended.  leaving read-only.\n");
 			sb->s_flags |= MS_RDONLY;
 			*flags |= MS_RDONLY;
-		} else if (test_bit(HFSPLUS_SB_FORCE, &sbi.flags)) {
+		} else if (sbi.flags & HFSPLUS_SB_FORCE) {
 			/* nothing */
 		} else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
 			printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n");
@@ -319,8 +320,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 		return -ENOMEM;
 
 	sb->s_fs_info = sbi;
-	mutex_init(&sbi->alloc_mutex);
-	mutex_init(&sbi->vh_mutex);
+	INIT_HLIST_HEAD(&sbi->rsrc_inodes);
 	hfsplus_fill_defaults(sbi);
 	if (!hfsplus_parse_options(data, sbi)) {
 		printk(KERN_ERR "hfs: unable to parse mount options\n");
@@ -344,7 +344,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 		err = -EINVAL;
 		goto cleanup;
 	}
-	vhdr = sbi->s_vhdr;
+	vhdr = HFSPLUS_SB(sb).s_vhdr;
 
 	/* Copy parts of the volume header into the superblock */
 	sb->s_magic = HFSPLUS_VOLHEAD_SIG;
@@ -353,19 +353,18 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 		printk(KERN_ERR "hfs: wrong filesystem version\n");
 		goto cleanup;
 	}
-	sbi->total_blocks = be32_to_cpu(vhdr->total_blocks);
-	sbi->free_blocks = be32_to_cpu(vhdr->free_blocks);
-	sbi->next_cnid = be32_to_cpu(vhdr->next_cnid);
-	sbi->file_count = be32_to_cpu(vhdr->file_count);
-	sbi->folder_count = be32_to_cpu(vhdr->folder_count);
-	sbi->data_clump_blocks =
-		be32_to_cpu(vhdr->data_clump_sz) >> sbi->alloc_blksz_shift;
-	if (!sbi->data_clump_blocks)
-		sbi->data_clump_blocks = 1;
-	sbi->rsrc_clump_blocks =
-		be32_to_cpu(vhdr->rsrc_clump_sz) >> sbi->alloc_blksz_shift;
-	if (!sbi->rsrc_clump_blocks)
-		sbi->rsrc_clump_blocks = 1;
+	HFSPLUS_SB(sb).total_blocks = be32_to_cpu(vhdr->total_blocks);
+	HFSPLUS_SB(sb).free_blocks = be32_to_cpu(vhdr->free_blocks);
+	HFSPLUS_SB(sb).next_alloc = be32_to_cpu(vhdr->next_alloc);
+	HFSPLUS_SB(sb).next_cnid = be32_to_cpu(vhdr->next_cnid);
+	HFSPLUS_SB(sb).file_count = be32_to_cpu(vhdr->file_count);
+	HFSPLUS_SB(sb).folder_count = be32_to_cpu(vhdr->folder_count);
+	HFSPLUS_SB(sb).data_clump_blocks = be32_to_cpu(vhdr->data_clump_sz) >> HFSPLUS_SB(sb).alloc_blksz_shift;
+	if (!HFSPLUS_SB(sb).data_clump_blocks)
+		HFSPLUS_SB(sb).data_clump_blocks = 1;
+	HFSPLUS_SB(sb).rsrc_clump_blocks = be32_to_cpu(vhdr->rsrc_clump_sz) >> HFSPLUS_SB(sb).alloc_blksz_shift;
+	if (!HFSPLUS_SB(sb).rsrc_clump_blocks)
+		HFSPLUS_SB(sb).rsrc_clump_blocks = 1;
 
 	/* Set up operations so we can load metadata */
 	sb->s_op = &hfsplus_sops;
@@ -375,7 +374,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 		printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, "
 		       "running fsck.hfsplus is recommended.  mounting read-only.\n");
 		sb->s_flags |= MS_RDONLY;
-	} else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) {
+	} else if (sbi->flags & HFSPLUS_SB_FORCE) {
 		/* nothing */
 	} else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
 		printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n");
@@ -385,15 +384,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 		       "use the force option at your own risk, mounting read-only.\n");
 		sb->s_flags |= MS_RDONLY;
 	}
+	sbi->flags &= ~HFSPLUS_SB_FORCE;
 
 	/* Load metadata objects (B*Trees) */
-	sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
-	if (!sbi->ext_tree) {
+	HFSPLUS_SB(sb).ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
+	if (!HFSPLUS_SB(sb).ext_tree) {
 		printk(KERN_ERR "hfs: failed to load extents file\n");
 		goto cleanup;
 	}
-	sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID);
-	if (!sbi->cat_tree) {
+	HFSPLUS_SB(sb).cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID);
+	if (!HFSPLUS_SB(sb).cat_tree) {
 		printk(KERN_ERR "hfs: failed to load catalog file\n");
 		goto cleanup;
 	}
@@ -404,7 +404,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 		err = PTR_ERR(inode);
 		goto cleanup;
 	}
-	sbi->alloc_file = inode;
+	HFSPLUS_SB(sb).alloc_file = inode;
 
 	/* Load the root directory */
 	root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID);
@@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 
 	str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
 	str.name = HFSP_HIDDENDIR_NAME;
-	hfs_find_init(sbi->cat_tree, &fd);
+	hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
 	hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
 	if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
 		hfs_find_exit(&fd);
@@ -434,7 +434,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 			err = PTR_ERR(inode);
 			goto cleanup;
 		}
-		sbi->hidden_dir = inode;
+		HFSPLUS_SB(sb).hidden_dir = inode;
 	} else
 		hfs_find_exit(&fd);
 
@@ -449,19 +449,15 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 	be32_add_cpu(&vhdr->write_count, 1);
 	vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
 	vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
-	mark_buffer_dirty(sbi->s_vhbh);
-	sync_dirty_buffer(sbi->s_vhbh);
+	mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
+	sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh);
 
-	if (!sbi->hidden_dir) {
+	if (!HFSPLUS_SB(sb).hidden_dir) {
 		printk(KERN_DEBUG "hfs: create hidden dir...\n");
-
-		mutex_lock(&sbi->vh_mutex);
-		sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
-		hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode,
-				   &str, sbi->hidden_dir);
-		mutex_unlock(&sbi->vh_mutex);
-
-		mark_inode_dirty(sbi->hidden_dir);
+		HFSPLUS_SB(sb).hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
+		hfsplus_create_cat(HFSPLUS_SB(sb).hidden_dir->i_ino, sb->s_root->d_inode,
+				   &str, HFSPLUS_SB(sb).hidden_dir);
+		mark_inode_dirty(HFSPLUS_SB(sb).hidden_dir);
 	}
 out:
 	unload_nls(sbi->nls);
@@ -490,7 +486,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
 
 static void hfsplus_destroy_inode(struct inode *inode)
 {
-	kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode));
+	kmem_cache_free(hfsplus_inode_cachep, &HFSPLUS_I(inode));
 }
 
 #define HFSPLUS_INODE_SIZE	sizeof(struct hfsplus_inode_info)
diff --git a/trunk/fs/hfsplus/unicode.c b/trunk/fs/hfsplus/unicode.c
index b66d67de882c..628ccf6fa402 100644
--- a/trunk/fs/hfsplus/unicode.c
+++ b/trunk/fs/hfsplus/unicode.c
@@ -121,7 +121,7 @@ static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
 int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p)
 {
 	const hfsplus_unichr *ip;
-	struct nls_table *nls = HFSPLUS_SB(sb)->nls;
+	struct nls_table *nls = HFSPLUS_SB(sb).nls;
 	u8 *op;
 	u16 cc, c0, c1;
 	u16 *ce1, *ce2;
@@ -132,7 +132,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c
 	ustrlen = be16_to_cpu(ustr->length);
 	len = *len_p;
 	ce1 = NULL;
-	compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
+	compose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
 
 	while (ustrlen > 0) {
 		c0 = be16_to_cpu(*ip++);
@@ -246,7 +246,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c
 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
 			      wchar_t *uc)
 {
-	int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
+	int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc);
 	if (size <= 0) {
 		*uc = '?';
 		size = 1;
@@ -293,7 +293,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
 	u16 *dstr, outlen = 0;
 	wchar_t c;
 
-	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
+	decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
 	while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
 		size = asc2unichar(sb, astr, len, &c);
 
@@ -330,8 +330,8 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
 	wchar_t c;
 	u16 c2;
 
-	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
-	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
+	casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD);
+	decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
 	hash = init_name_hash();
 	astr = str->name;
 	len = str->len;
@@ -373,8 +373,8 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *
 	u16 c1, c2;
 	wchar_t c;
 
-	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
-	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
+	casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD);
+	decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
 	astr1 = s1->name;
 	len1 = s1->len;
 	astr2 = s2->name;
diff --git a/trunk/fs/hfsplus/wrapper.c b/trunk/fs/hfsplus/wrapper.c
index 8972c20b3216..bed78ac8f6d1 100644
--- a/trunk/fs/hfsplus/wrapper.c
+++ b/trunk/fs/hfsplus/wrapper.c
@@ -65,8 +65,8 @@ static int hfsplus_get_last_session(struct super_block *sb,
 	*start = 0;
 	*size = sb->s_bdev->bd_inode->i_size >> 9;
 
-	if (HFSPLUS_SB(sb)->session >= 0) {
-		te.cdte_track = HFSPLUS_SB(sb)->session;
+	if (HFSPLUS_SB(sb).session >= 0) {
+		te.cdte_track = HFSPLUS_SB(sb).session;
 		te.cdte_format = CDROM_LBA;
 		res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te);
 		if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) {
@@ -87,7 +87,6 @@ static int hfsplus_get_last_session(struct super_block *sb,
 /* Takes in super block, returns true if good data read */
 int hfsplus_read_wrapper(struct super_block *sb)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
 	struct buffer_head *bh;
 	struct hfsplus_vh *vhdr;
 	struct hfsplus_wd wd;
@@ -123,7 +122,7 @@ int hfsplus_read_wrapper(struct super_block *sb)
 		if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG))
 			break;
 		if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) {
-			set_bit(HFSPLUS_SB_HFSX, &sbi->flags);
+			HFSPLUS_SB(sb).flags |= HFSPLUS_SB_HFSX;
 			break;
 		}
 		brelse(bh);
@@ -144,11 +143,11 @@ int hfsplus_read_wrapper(struct super_block *sb)
 	if (blocksize < HFSPLUS_SECTOR_SIZE ||
 	    ((blocksize - 1) & blocksize))
 		return -EINVAL;
-	sbi->alloc_blksz = blocksize;
-	sbi->alloc_blksz_shift = 0;
+	HFSPLUS_SB(sb).alloc_blksz = blocksize;
+	HFSPLUS_SB(sb).alloc_blksz_shift = 0;
 	while ((blocksize >>= 1) != 0)
-		sbi->alloc_blksz_shift++;
-	blocksize = min(sbi->alloc_blksz, (u32)PAGE_SIZE);
+		HFSPLUS_SB(sb).alloc_blksz_shift++;
+	blocksize = min(HFSPLUS_SB(sb).alloc_blksz, (u32)PAGE_SIZE);
 
 	/* align block size to block offset */
 	while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1))
@@ -159,26 +158,23 @@ int hfsplus_read_wrapper(struct super_block *sb)
 		return -EINVAL;
 	}
 
-	sbi->blockoffset =
-		part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT);
-	sbi->sect_count = part_size;
-	sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits;
+	HFSPLUS_SB(sb).blockoffset = part_start >>
+			(sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT);
+	HFSPLUS_SB(sb).sect_count = part_size;
+	HFSPLUS_SB(sb).fs_shift = HFSPLUS_SB(sb).alloc_blksz_shift -
+			sb->s_blocksize_bits;
 
 	bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr);
 	if (!bh)
 		return -EIO;
 
 	/* should still be the same... */
-	if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) {
-		if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX))
-			goto error;
-	} else {
-		if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIG))
-			goto error;
-	}
-
-	sbi->s_vhbh = bh;
-	sbi->s_vhdr = vhdr;
+	if (vhdr->signature != (HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX ?
+				cpu_to_be16(HFSPLUS_VOLHEAD_SIGX) :
+				cpu_to_be16(HFSPLUS_VOLHEAD_SIG)))
+		goto error;
+	HFSPLUS_SB(sb).s_vhbh = bh;
+	HFSPLUS_SB(sb).s_vhdr = vhdr;
 
 	return 0;
  error:
diff --git a/trunk/include/linux/ceph/debugfs.h b/trunk/include/linux/ceph/debugfs.h
deleted file mode 100644
index 2a79702e092b..000000000000
--- a/trunk/include/linux/ceph/debugfs.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef _FS_CEPH_DEBUGFS_H
-#define _FS_CEPH_DEBUGFS_H
-
-#include "ceph_debug.h"
-#include "types.h"
-
-#define CEPH_DEFINE_SHOW_FUNC(name)					\
-static int name##_open(struct inode *inode, struct file *file)		\
-{									\
-	struct seq_file *sf;						\
-	int ret;							\
-									\
-	ret = single_open(file, name, NULL);				\
-	sf = file->private_data;					\
-	sf->private = inode->i_private;					\
-	return ret;							\
-}									\
-									\
-static const struct file_operations name##_fops = {			\
-	.open		= name##_open,					\
-	.read		= seq_read,					\
-	.llseek		= seq_lseek,					\
-	.release	= single_release,				\
-};
-
-/* debugfs.c */
-extern int ceph_debugfs_init(void);
-extern void ceph_debugfs_cleanup(void);
-extern int ceph_debugfs_client_init(struct ceph_client *client);
-extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
-
-#endif
-
diff --git a/trunk/include/linux/ceph/libceph.h b/trunk/include/linux/ceph/libceph.h
deleted file mode 100644
index f22b2e941686..000000000000
--- a/trunk/include/linux/ceph/libceph.h
+++ /dev/null
@@ -1,249 +0,0 @@
-#ifndef _FS_CEPH_LIBCEPH_H
-#define _FS_CEPH_LIBCEPH_H
-
-#include "ceph_debug.h"
-
-#include <asm/unaligned.h>
-#include <linux/backing-dev.h>
-#include <linux/completion.h>
-#include <linux/exportfs.h>
-#include <linux/fs.h>
-#include <linux/mempool.h>
-#include <linux/pagemap.h>
-#include <linux/wait.h>
-#include <linux/writeback.h>
-#include <linux/slab.h>
-
-#include "types.h"
-#include "messenger.h"
-#include "msgpool.h"
-#include "mon_client.h"
-#include "osd_client.h"
-#include "ceph_fs.h"
-
-/*
- * Supported features
- */
-#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR
-#define CEPH_FEATURE_REQUIRED_DEFAULT  CEPH_FEATURE_NOSRCADDR
-
-/*
- * mount options
- */
-#define CEPH_OPT_FSID             (1<<0)
-#define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
-#define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
-#define CEPH_OPT_NOCRC            (1<<3) /* no data crc on writes */
-
-#define CEPH_OPT_DEFAULT   (0);
-
-#define ceph_set_opt(client, opt) \
-	(client)->options->flags |= CEPH_OPT_##opt;
-#define ceph_test_opt(client, opt) \
-	(!!((client)->options->flags & CEPH_OPT_##opt))
-
-struct ceph_options {
-	int flags;
-	struct ceph_fsid fsid;
-	struct ceph_entity_addr my_addr;
-	int mount_timeout;
-	int osd_idle_ttl;
-	int osd_timeout;
-	int osd_keepalive_timeout;
-
-	/*
-	 * any type that can't be simply compared or doesn't need need
-	 * to be compared should go beyond this point,
-	 * ceph_compare_options() should be updated accordingly
-	 */
-
-	struct ceph_entity_addr *mon_addr; /* should be the first
-					      pointer type of args */
-	int num_mon;
-	char *name;
-	char *secret;
-};
-
-/*
- * defaults
- */
-#define CEPH_MOUNT_TIMEOUT_DEFAULT  60
-#define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
-#define CEPH_OSD_KEEPALIVE_DEFAULT  5
-#define CEPH_OSD_IDLE_TTL_DEFAULT    60
-#define CEPH_MOUNT_RSIZE_DEFAULT    (512*1024) /* readahead */
-
-#define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
-#define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024)
-
-#define CEPH_AUTH_NAME_DEFAULT   "guest"
-
-/*
- * Delay telling the MDS we no longer want caps, in case we reopen
- * the file.  Delay a minimum amount of time, even if we send a cap
- * message for some other reason.  Otherwise, take the oppotunity to
- * update the mds to avoid sending another message later.
- */
-#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT      5  /* cap release delay */
-#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT     60  /* cap release delay */
-
-#define CEPH_CAP_RELEASE_SAFETY_DEFAULT        (CEPH_CAPS_PER_RELEASE * 4)
-
-/* mount state */
-enum {
-	CEPH_MOUNT_MOUNTING,
-	CEPH_MOUNT_MOUNTED,
-	CEPH_MOUNT_UNMOUNTING,
-	CEPH_MOUNT_UNMOUNTED,
-	CEPH_MOUNT_SHUTDOWN,
-};
-
-/*
- * subtract jiffies
- */
-static inline unsigned long time_sub(unsigned long a, unsigned long b)
-{
-	BUG_ON(time_after(b, a));
-	return (long)a - (long)b;
-}
-
-struct ceph_mds_client;
-
-/*
- * per client state
- *
- * possibly shared by multiple mount points, if they are
- * mounting the same ceph filesystem/cluster.
- */
-struct ceph_client {
-	struct ceph_fsid fsid;
-	bool have_fsid;
-
-	void *private;
-
-	struct ceph_options *options;
-
-	struct mutex mount_mutex;      /* serialize mount attempts */
-	wait_queue_head_t auth_wq;
-	int auth_err;
-
-	int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
-
-	u32 supported_features;
-	u32 required_features;
-
-	struct ceph_messenger *msgr;   /* messenger instance */
-	struct ceph_mon_client monc;
-	struct ceph_osd_client osdc;
-
-#ifdef CONFIG_DEBUG_FS
-	struct dentry *debugfs_dir;
-	struct dentry *debugfs_monmap;
-	struct dentry *debugfs_osdmap;
-#endif
-};
-
-
-
-/*
- * snapshots
- */
-
-/*
- * A "snap context" is the set of existing snapshots when we
- * write data.  It is used by the OSD to guide its COW behavior.
- *
- * The ceph_snap_context is refcounted, and attached to each dirty
- * page, indicating which context the dirty data belonged when it was
- * dirtied.
- */
-struct ceph_snap_context {
-	atomic_t nref;
-	u64 seq;
-	int num_snaps;
-	u64 snaps[];
-};
-
-static inline struct ceph_snap_context *
-ceph_get_snap_context(struct ceph_snap_context *sc)
-{
-	/*
-	printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
-	       atomic_read(&sc->nref)+1);
-	*/
-	if (sc)
-		atomic_inc(&sc->nref);
-	return sc;
-}
-
-static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
-{
-	if (!sc)
-		return;
-	/*
-	printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
-	       atomic_read(&sc->nref)-1);
-	*/
-	if (atomic_dec_and_test(&sc->nref)) {
-		/*printk(" deleting snap_context %p\n", sc);*/
-		kfree(sc);
-	}
-}
-
-/*
- * calculate the number of pages a given length and offset map onto,
- * if we align the data.
- */
-static inline int calc_pages_for(u64 off, u64 len)
-{
-	return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
-		(off >> PAGE_CACHE_SHIFT);
-}
-
-/* ceph_common.c */
-extern const char *ceph_msg_type_name(int type);
-extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
-extern struct kmem_cache *ceph_inode_cachep;
-extern struct kmem_cache *ceph_cap_cachep;
-extern struct kmem_cache *ceph_dentry_cachep;
-extern struct kmem_cache *ceph_file_cachep;
-
-extern int ceph_parse_options(struct ceph_options **popt, char *options,
-			      const char *dev_name, const char *dev_name_end,
-			      int (*parse_extra_token)(char *c, void *private),
-			      void *private);
-extern void ceph_destroy_options(struct ceph_options *opt);
-extern int ceph_compare_options(struct ceph_options *new_opt,
-				struct ceph_client *client);
-extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
-					      void *private);
-extern u64 ceph_client_id(struct ceph_client *client);
-extern void ceph_destroy_client(struct ceph_client *client);
-extern int __ceph_open_session(struct ceph_client *client,
-			       unsigned long started);
-extern int ceph_open_session(struct ceph_client *client);
-
-/* pagevec.c */
-extern void ceph_release_page_vector(struct page **pages, int num_pages);
-
-extern struct page **ceph_get_direct_page_vector(const char __user *data,
-					    int num_pages,
-					    loff_t off, size_t len);
-extern void ceph_put_page_vector(struct page **pages, int num_pages);
-extern void ceph_release_page_vector(struct page **pages, int num_pages);
-extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
-extern int ceph_copy_user_to_page_vector(struct page **pages,
-					 const char __user *data,
-					 loff_t off, size_t len);
-extern int ceph_copy_to_page_vector(struct page **pages,
-				    const char *data,
-				    loff_t off, size_t len);
-extern int ceph_copy_from_page_vector(struct page **pages,
-				    char *data,
-				    loff_t off, size_t len);
-extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data,
-				    loff_t off, size_t len);
-extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
-
-
-#endif /* _FS_CEPH_SUPER_H */
diff --git a/trunk/net/Kconfig b/trunk/net/Kconfig
index 55fd82e9ffd9..e926884c1675 100644
--- a/trunk/net/Kconfig
+++ b/trunk/net/Kconfig
@@ -293,7 +293,6 @@ source "net/wimax/Kconfig"
 source "net/rfkill/Kconfig"
 source "net/9p/Kconfig"
 source "net/caif/Kconfig"
-source "net/ceph/Kconfig"
 
 
 endif   # if NET
diff --git a/trunk/net/Makefile b/trunk/net/Makefile
index 6b7bfd7f1416..ea60fbce9b1b 100644
--- a/trunk/net/Makefile
+++ b/trunk/net/Makefile
@@ -68,4 +68,3 @@ obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
 endif
 obj-$(CONFIG_WIMAX)		+= wimax/
 obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/
-obj-$(CONFIG_CEPH_LIB)		+= ceph/
diff --git a/trunk/net/ceph/Kconfig b/trunk/net/ceph/Kconfig
deleted file mode 100644
index ad424049b0cf..000000000000
--- a/trunk/net/ceph/Kconfig
+++ /dev/null
@@ -1,28 +0,0 @@
-config CEPH_LIB
-        tristate "Ceph core library (EXPERIMENTAL)"
-	depends on INET && EXPERIMENTAL
-	select LIBCRC32C
-	select CRYPTO_AES
-	select CRYPTO
-	default n
-	help
-	  Choose Y or M here to include cephlib, which provides the
-	  common functionality to both the Ceph filesystem and
-	  to the rados block device (rbd).
-
-	  More information at http://ceph.newdream.net/.
-
-	  If unsure, say N.
-
-config CEPH_LIB_PRETTYDEBUG
-	bool "Include file:line in ceph debug output"
-	depends on CEPH_LIB
-	default n
-	help
-	  If you say Y here, debug output will include a filename and
-	  line to aid debugging.  This increases kernel size and slows
-	  execution slightly when debug call sites are enabled (e.g.,
-	  via CONFIG_DYNAMIC_DEBUG).
-
-	  If unsure, say N.
-
diff --git a/trunk/net/ceph/Makefile b/trunk/net/ceph/Makefile
deleted file mode 100644
index aab1cabb8035..000000000000
--- a/trunk/net/ceph/Makefile
+++ /dev/null
@@ -1,37 +0,0 @@
-#
-# Makefile for CEPH filesystem.
-#
-
-ifneq ($(KERNELRELEASE),)
-
-obj-$(CONFIG_CEPH_LIB) += libceph.o
-
-libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
-	mon_client.o \
-	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
-	debugfs.o \
-	auth.o auth_none.o \
-	crypto.o armor.o \
-	auth_x.o \
-	ceph_fs.o ceph_strings.o ceph_hash.o \
-	pagevec.o
-
-else
-#Otherwise we were called directly from the command
-# line; invoke the kernel build system.
-
-KERNELDIR ?= /lib/modules/$(shell uname -r)/build
-PWD := $(shell pwd)
-
-default: all
-
-all:
-	$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules
-
-modules_install:
-	$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install
-
-clean:
-	$(MAKE) -C $(KERNELDIR) M=$(PWD) clean
-
-endif
diff --git a/trunk/net/ceph/ceph_common.c b/trunk/net/ceph/ceph_common.c
deleted file mode 100644
index f3e4a13fea0c..000000000000
--- a/trunk/net/ceph/ceph_common.c
+++ /dev/null
@@ -1,529 +0,0 @@
-
-#include <linux/ceph/ceph_debug.h>
-#include <linux/backing-dev.h>
-#include <linux/ctype.h>
-#include <linux/fs.h>
-#include <linux/inet.h>
-#include <linux/in6.h>
-#include <linux/module.h>
-#include <linux/mount.h>
-#include <linux/parser.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/statfs.h>
-#include <linux/string.h>
-
-
-#include <linux/ceph/libceph.h>
-#include <linux/ceph/debugfs.h>
-#include <linux/ceph/decode.h>
-#include <linux/ceph/mon_client.h>
-#include <linux/ceph/auth.h>
-
-
-
-/*
- * find filename portion of a path (/foo/bar/baz -> baz)
- */
-const char *ceph_file_part(const char *s, int len)
-{
-	const char *e = s + len;
-
-	while (e != s && *(e-1) != '/')
-		e--;
-	return e;
-}
-EXPORT_SYMBOL(ceph_file_part);
-
-const char *ceph_msg_type_name(int type)
-{
-	switch (type) {
-	case CEPH_MSG_SHUTDOWN: return "shutdown";
-	case CEPH_MSG_PING: return "ping";
-	case CEPH_MSG_AUTH: return "auth";
-	case CEPH_MSG_AUTH_REPLY: return "auth_reply";
-	case CEPH_MSG_MON_MAP: return "mon_map";
-	case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
-	case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
-	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
-	case CEPH_MSG_STATFS: return "statfs";
-	case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
-	case CEPH_MSG_MDS_MAP: return "mds_map";
-	case CEPH_MSG_CLIENT_SESSION: return "client_session";
-	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
-	case CEPH_MSG_CLIENT_REQUEST: return "client_request";
-	case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
-	case CEPH_MSG_CLIENT_REPLY: return "client_reply";
-	case CEPH_MSG_CLIENT_CAPS: return "client_caps";
-	case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
-	case CEPH_MSG_CLIENT_SNAP: return "client_snap";
-	case CEPH_MSG_CLIENT_LEASE: return "client_lease";
-	case CEPH_MSG_OSD_MAP: return "osd_map";
-	case CEPH_MSG_OSD_OP: return "osd_op";
-	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
-	default: return "unknown";
-	}
-}
-EXPORT_SYMBOL(ceph_msg_type_name);
-
-/*
- * Initially learn our fsid, or verify an fsid matches.
- */
-int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
-{
-	if (client->have_fsid) {
-		if (ceph_fsid_compare(&client->fsid, fsid)) {
-			pr_err("bad fsid, had %pU got %pU",
-			       &client->fsid, fsid);
-			return -1;
-		}
-	} else {
-		pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
-		memcpy(&client->fsid, fsid, sizeof(*fsid));
-		ceph_debugfs_client_init(client);
-		client->have_fsid = true;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(ceph_check_fsid);
-
-static int strcmp_null(const char *s1, const char *s2)
-{
-	if (!s1 && !s2)
-		return 0;
-	if (s1 && !s2)
-		return -1;
-	if (!s1 && s2)
-		return 1;
-	return strcmp(s1, s2);
-}
-
-int ceph_compare_options(struct ceph_options *new_opt,
-			 struct ceph_client *client)
-{
-	struct ceph_options *opt1 = new_opt;
-	struct ceph_options *opt2 = client->options;
-	int ofs = offsetof(struct ceph_options, mon_addr);
-	int i;
-	int ret;
-
-	ret = memcmp(opt1, opt2, ofs);
-	if (ret)
-		return ret;
-
-	ret = strcmp_null(opt1->name, opt2->name);
-	if (ret)
-		return ret;
-
-	ret = strcmp_null(opt1->secret, opt2->secret);
-	if (ret)
-		return ret;
-
-	/* any matching mon ip implies a match */
-	for (i = 0; i < opt1->num_mon; i++) {
-		if (ceph_monmap_contains(client->monc.monmap,
-				 &opt1->mon_addr[i]))
-			return 0;
-	}
-	return -1;
-}
-EXPORT_SYMBOL(ceph_compare_options);
-
-
-static int parse_fsid(const char *str, struct ceph_fsid *fsid)
-{
-	int i = 0;
-	char tmp[3];
-	int err = -EINVAL;
-	int d;
-
-	dout("parse_fsid '%s'\n", str);
-	tmp[2] = 0;
-	while (*str && i < 16) {
-		if (ispunct(*str)) {
-			str++;
-			continue;
-		}
-		if (!isxdigit(str[0]) || !isxdigit(str[1]))
-			break;
-		tmp[0] = str[0];
-		tmp[1] = str[1];
-		if (sscanf(tmp, "%x", &d) < 1)
-			break;
-		fsid->fsid[i] = d & 0xff;
-		i++;
-		str += 2;
-	}
-
-	if (i == 16)
-		err = 0;
-	dout("parse_fsid ret %d got fsid %pU", err, fsid);
-	return err;
-}
-
-/*
- * ceph options
- */
-enum {
-	Opt_osdtimeout,
-	Opt_osdkeepalivetimeout,
-	Opt_mount_timeout,
-	Opt_osd_idle_ttl,
-	Opt_last_int,
-	/* int args above */
-	Opt_fsid,
-	Opt_name,
-	Opt_secret,
-	Opt_ip,
-	Opt_last_string,
-	/* string args above */
-	Opt_noshare,
-	Opt_nocrc,
-};
-
-static match_table_t opt_tokens = {
-	{Opt_osdtimeout, "osdtimeout=%d"},
-	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
-	{Opt_mount_timeout, "mount_timeout=%d"},
-	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
-	/* int args above */
-	{Opt_fsid, "fsid=%s"},
-	{Opt_name, "name=%s"},
-	{Opt_secret, "secret=%s"},
-	{Opt_ip, "ip=%s"},
-	/* string args above */
-	{Opt_noshare, "noshare"},
-	{Opt_nocrc, "nocrc"},
-	{-1, NULL}
-};
-
-void ceph_destroy_options(struct ceph_options *opt)
-{
-	dout("destroy_options %p\n", opt);
-	kfree(opt->name);
-	kfree(opt->secret);
-	kfree(opt);
-}
-EXPORT_SYMBOL(ceph_destroy_options);
-
-int ceph_parse_options(struct ceph_options **popt, char *options,
-		       const char *dev_name, const char *dev_name_end,
-		       int (*parse_extra_token)(char *c, void *private),
-		       void *private)
-{
-	struct ceph_options *opt;
-	const char *c;
-	int err = -ENOMEM;
-	substring_t argstr[MAX_OPT_ARGS];
-
-	opt = kzalloc(sizeof(*opt), GFP_KERNEL);
-	if (!opt)
-		return err;
-	opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
-				GFP_KERNEL);
-	if (!opt->mon_addr)
-		goto out;
-
-	dout("parse_options %p options '%s' dev_name '%s'\n", opt, options,
-	     dev_name);
-
-	/* start with defaults */
-	opt->flags = CEPH_OPT_DEFAULT;
-	opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
-	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
-	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
-	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
-
-	/* get mon ip(s) */
-	/* ip1[:port1][,ip2[:port2]...] */
-	err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr,
-			     CEPH_MAX_MON, &opt->num_mon);
-	if (err < 0)
-		goto out;
-
-	/* parse mount options */
-	while ((c = strsep(&options, ",")) != NULL) {
-		int token, intval, ret;
-		if (!*c)
-			continue;
-		err = -EINVAL;
-		token = match_token((char *)c, opt_tokens, argstr);
-		if (token < 0 && parse_extra_token) {
-			/* extra? */
-			err = parse_extra_token((char *)c, private);
-			if (err < 0) {
-				pr_err("bad option at '%s'\n", c);
-				goto out;
-			}
-			continue;
-		}
-		if (token < Opt_last_int) {
-			ret = match_int(&argstr[0], &intval);
-			if (ret < 0) {
-				pr_err("bad mount option arg (not int) "
-				       "at '%s'\n", c);
-				continue;
-			}
-			dout("got int token %d val %d\n", token, intval);
-		} else if (token > Opt_last_int && token < Opt_last_string) {
-			dout("got string token %d val %s\n", token,
-			     argstr[0].from);
-		} else {
-			dout("got token %d\n", token);
-		}
-		switch (token) {
-		case Opt_ip:
-			err = ceph_parse_ips(argstr[0].from,
-					     argstr[0].to,
-					     &opt->my_addr,
-					     1, NULL);
-			if (err < 0)
-				goto out;
-			opt->flags |= CEPH_OPT_MYIP;
-			break;
-
-		case Opt_fsid:
-			err = parse_fsid(argstr[0].from, &opt->fsid);
-			if (err == 0)
-				opt->flags |= CEPH_OPT_FSID;
-			break;
-		case Opt_name:
-			opt->name = kstrndup(argstr[0].from,
-					      argstr[0].to-argstr[0].from,
-					      GFP_KERNEL);
-			break;
-		case Opt_secret:
-			opt->secret = kstrndup(argstr[0].from,
-						argstr[0].to-argstr[0].from,
-						GFP_KERNEL);
-			break;
-
-			/* misc */
-		case Opt_osdtimeout:
-			opt->osd_timeout = intval;
-			break;
-		case Opt_osdkeepalivetimeout:
-			opt->osd_keepalive_timeout = intval;
-			break;
-		case Opt_osd_idle_ttl:
-			opt->osd_idle_ttl = intval;
-			break;
-		case Opt_mount_timeout:
-			opt->mount_timeout = intval;
-			break;
-
-		case Opt_noshare:
-			opt->flags |= CEPH_OPT_NOSHARE;
-			break;
-
-		case Opt_nocrc:
-			opt->flags |= CEPH_OPT_NOCRC;
-			break;
-
-		default:
-			BUG_ON(token);
-		}
-	}
-
-	/* success */
-	*popt = opt;
-	return 0;
-
-out:
-	ceph_destroy_options(opt);
-	return err;
-}
-EXPORT_SYMBOL(ceph_parse_options);
-
-u64 ceph_client_id(struct ceph_client *client)
-{
-	return client->monc.auth->global_id;
-}
-EXPORT_SYMBOL(ceph_client_id);
-
-/*
- * create a fresh client instance
- */
-struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
-{
-	struct ceph_client *client;
-	int err = -ENOMEM;
-
-	client = kzalloc(sizeof(*client), GFP_KERNEL);
-	if (client == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	client->private = private;
-	client->options = opt;
-
-	mutex_init(&client->mount_mutex);
-	init_waitqueue_head(&client->auth_wq);
-	client->auth_err = 0;
-
-	client->extra_mon_dispatch = NULL;
-	client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT;
-	client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT;
-
-	client->msgr = NULL;
-
-	/* subsystems */
-	err = ceph_monc_init(&client->monc, client);
-	if (err < 0)
-		goto fail;
-	err = ceph_osdc_init(&client->osdc, client);
-	if (err < 0)
-		goto fail_monc;
-
-	return client;
-
-fail_monc:
-	ceph_monc_stop(&client->monc);
-fail:
-	kfree(client);
-	return ERR_PTR(err);
-}
-EXPORT_SYMBOL(ceph_create_client);
-
-void ceph_destroy_client(struct ceph_client *client)
-{
-	dout("destroy_client %p\n", client);
-
-	/* unmount */
-	ceph_osdc_stop(&client->osdc);
-
-	/*
-	 * make sure mds and osd connections close out before destroying
-	 * the auth module, which is needed to free those connections'
-	 * ceph_authorizers.
-	 */
-	ceph_msgr_flush();
-
-	ceph_monc_stop(&client->monc);
-
-	ceph_debugfs_client_cleanup(client);
-
-	if (client->msgr)
-		ceph_messenger_destroy(client->msgr);
-
-	ceph_destroy_options(client->options);
-
-	kfree(client);
-	dout("destroy_client %p done\n", client);
-}
-EXPORT_SYMBOL(ceph_destroy_client);
-
-/*
- * true if we have the mon map (and have thus joined the cluster)
- */
-static int have_mon_and_osd_map(struct ceph_client *client)
-{
-	return client->monc.monmap && client->monc.monmap->epoch &&
-	       client->osdc.osdmap && client->osdc.osdmap->epoch;
-}
-
-/*
- * mount: join the ceph cluster, and open root directory.
- */
-int __ceph_open_session(struct ceph_client *client, unsigned long started)
-{
-	struct ceph_entity_addr *myaddr = NULL;
-	int err;
-	unsigned long timeout = client->options->mount_timeout * HZ;
-
-	/* initialize the messenger */
-	if (client->msgr == NULL) {
-		if (ceph_test_opt(client, MYIP))
-			myaddr = &client->options->my_addr;
-		client->msgr = ceph_messenger_create(myaddr,
-					client->supported_features,
-					client->required_features);
-		if (IS_ERR(client->msgr)) {
-			client->msgr = NULL;
-			return PTR_ERR(client->msgr);
-		}
-		client->msgr->nocrc = ceph_test_opt(client, NOCRC);
-	}
-
-	/* open session, and wait for mon and osd maps */
-	err = ceph_monc_open_session(&client->monc);
-	if (err < 0)
-		return err;
-
-	while (!have_mon_and_osd_map(client)) {
-		err = -EIO;
-		if (timeout && time_after_eq(jiffies, started + timeout))
-			return err;
-
-		/* wait */
-		dout("mount waiting for mon_map\n");
-		err = wait_event_interruptible_timeout(client->auth_wq,
-			have_mon_and_osd_map(client) || (client->auth_err < 0),
-			timeout);
-		if (err == -EINTR || err == -ERESTARTSYS)
-			return err;
-		if (client->auth_err < 0)
-			return client->auth_err;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(__ceph_open_session);
-
-
-int ceph_open_session(struct ceph_client *client)
-{
-	int ret;
-	unsigned long started = jiffies;  /* note the start time */
-
-	dout("open_session start\n");
-	mutex_lock(&client->mount_mutex);
-
-	ret = __ceph_open_session(client, started);
-
-	mutex_unlock(&client->mount_mutex);
-	return ret;
-}
-EXPORT_SYMBOL(ceph_open_session);
-
-
-static int __init init_ceph_lib(void)
-{
-	int ret = 0;
-
-	ret = ceph_debugfs_init();
-	if (ret < 0)
-		goto out;
-
-	ret = ceph_msgr_init();
-	if (ret < 0)
-		goto out_debugfs;
-
-	pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n",
-		CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL,
-		CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
-		CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
-
-	return 0;
-
-out_debugfs:
-	ceph_debugfs_cleanup();
-out:
-	return ret;
-}
-
-static void __exit exit_ceph_lib(void)
-{
-	dout("exit_ceph_lib\n");
-	ceph_msgr_exit();
-	ceph_debugfs_cleanup();
-}
-
-module_init(init_ceph_lib);
-module_exit(exit_ceph_lib);
-
-MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
-MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
-MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
-MODULE_DESCRIPTION("Ceph filesystem for Linux");
-MODULE_LICENSE("GPL");
diff --git a/trunk/net/ceph/ceph_strings.c b/trunk/net/ceph/ceph_strings.c
deleted file mode 100644
index 3fbda04de29c..000000000000
--- a/trunk/net/ceph/ceph_strings.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Ceph string constants
- */
-#include <linux/module.h>
-#include <linux/ceph/types.h>
-
-const char *ceph_entity_type_name(int type)
-{
-	switch (type) {
-	case CEPH_ENTITY_TYPE_MDS: return "mds";
-	case CEPH_ENTITY_TYPE_OSD: return "osd";
-	case CEPH_ENTITY_TYPE_MON: return "mon";
-	case CEPH_ENTITY_TYPE_CLIENT: return "client";
-	case CEPH_ENTITY_TYPE_AUTH: return "auth";
-	default: return "unknown";
-	}
-}
-
-const char *ceph_osd_op_name(int op)
-{
-	switch (op) {
-	case CEPH_OSD_OP_READ: return "read";
-	case CEPH_OSD_OP_STAT: return "stat";
-
-	case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
-
-	case CEPH_OSD_OP_WRITE: return "write";
-	case CEPH_OSD_OP_DELETE: return "delete";
-	case CEPH_OSD_OP_TRUNCATE: return "truncate";
-	case CEPH_OSD_OP_ZERO: return "zero";
-	case CEPH_OSD_OP_WRITEFULL: return "writefull";
-	case CEPH_OSD_OP_ROLLBACK: return "rollback";
-
-	case CEPH_OSD_OP_APPEND: return "append";
-	case CEPH_OSD_OP_STARTSYNC: return "startsync";
-	case CEPH_OSD_OP_SETTRUNC: return "settrunc";
-	case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
-
-	case CEPH_OSD_OP_TMAPUP: return "tmapup";
-	case CEPH_OSD_OP_TMAPGET: return "tmapget";
-	case CEPH_OSD_OP_TMAPPUT: return "tmapput";
-
-	case CEPH_OSD_OP_GETXATTR: return "getxattr";
-	case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
-	case CEPH_OSD_OP_SETXATTR: return "setxattr";
-	case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
-	case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
-	case CEPH_OSD_OP_RMXATTR: return "rmxattr";
-	case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
-
-	case CEPH_OSD_OP_PULL: return "pull";
-	case CEPH_OSD_OP_PUSH: return "push";
-	case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
-	case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
-	case CEPH_OSD_OP_SCRUB: return "scrub";
-
-	case CEPH_OSD_OP_WRLOCK: return "wrlock";
-	case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
-	case CEPH_OSD_OP_RDLOCK: return "rdlock";
-	case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
-	case CEPH_OSD_OP_UPLOCK: return "uplock";
-	case CEPH_OSD_OP_DNLOCK: return "dnlock";
-
-	case CEPH_OSD_OP_CALL: return "call";
-
-	case CEPH_OSD_OP_PGLS: return "pgls";
-	}
-	return "???";
-}
-
-
-const char *ceph_pool_op_name(int op)
-{
-	switch (op) {
-	case POOL_OP_CREATE: return "create";
-	case POOL_OP_DELETE: return "delete";
-	case POOL_OP_AUID_CHANGE: return "auid change";
-	case POOL_OP_CREATE_SNAP: return "create snap";
-	case POOL_OP_DELETE_SNAP: return "delete snap";
-	case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
-	case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
-	}
-	return "???";
-}
diff --git a/trunk/net/ceph/debugfs.c b/trunk/net/ceph/debugfs.c
deleted file mode 100644
index 27d4ea315d12..000000000000
--- a/trunk/net/ceph/debugfs.c
+++ /dev/null
@@ -1,267 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/device.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/ctype.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#include <linux/ceph/libceph.h>
-#include <linux/ceph/mon_client.h>
-#include <linux/ceph/auth.h>
-#include <linux/ceph/debugfs.h>
-
-#ifdef CONFIG_DEBUG_FS
-
-/*
- * Implement /sys/kernel/debug/ceph fun
- *
- * /sys/kernel/debug/ceph/client*  - an instance of the ceph client
- *      .../osdmap      - current osdmap
- *      .../monmap      - current monmap
- *      .../osdc        - active osd requests
- *      .../monc        - mon client state
- *      .../dentry_lru  - dump contents of dentry lru
- *      .../caps        - expose cap (reservation) stats
- *      .../bdi         - symlink to ../../bdi/something
- */
-
-static struct dentry *ceph_debugfs_dir;
-
-static int monmap_show(struct seq_file *s, void *p)
-{
-	int i;
-	struct ceph_client *client = s->private;
-
-	if (client->monc.monmap == NULL)
-		return 0;
-
-	seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
-	for (i = 0; i < client->monc.monmap->num_mon; i++) {
-		struct ceph_entity_inst *inst =
-			&client->monc.monmap->mon_inst[i];
-
-		seq_printf(s, "\t%s%lld\t%s\n",
-			   ENTITY_NAME(inst->name),
-			   ceph_pr_addr(&inst->addr.in_addr));
-	}
-	return 0;
-}
-
-static int osdmap_show(struct seq_file *s, void *p)
-{
-	int i;
-	struct ceph_client *client = s->private;
-	struct rb_node *n;
-
-	if (client->osdc.osdmap == NULL)
-		return 0;
-	seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch);
-	seq_printf(s, "flags%s%s\n",
-		   (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ?
-		   " NEARFULL" : "",
-		   (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
-		   " FULL" : "");
-	for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
-		struct ceph_pg_pool_info *pool =
-			rb_entry(n, struct ceph_pg_pool_info, node);
-		seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
-			   pool->id, pool->v.pg_num, pool->pg_num_mask,
-			   pool->v.lpg_num, pool->lpg_num_mask);
-	}
-	for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
-		struct ceph_entity_addr *addr =
-			&client->osdc.osdmap->osd_addr[i];
-		int state = client->osdc.osdmap->osd_state[i];
-		char sb[64];
-
-		seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n",
-			   i, ceph_pr_addr(&addr->in_addr),
-			   ((client->osdc.osdmap->osd_weight[i]*100) >> 16),
-			   ceph_osdmap_state_str(sb, sizeof(sb), state));
-	}
-	return 0;
-}
-
-static int monc_show(struct seq_file *s, void *p)
-{
-	struct ceph_client *client = s->private;
-	struct ceph_mon_generic_request *req;
-	struct ceph_mon_client *monc = &client->monc;
-	struct rb_node *rp;
-
-	mutex_lock(&monc->mutex);
-
-	if (monc->have_mdsmap)
-		seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap);
-	if (monc->have_osdmap)
-		seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap);
-	if (monc->want_next_osdmap)
-		seq_printf(s, "want next osdmap\n");
-
-	for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
-		__u16 op;
-		req = rb_entry(rp, struct ceph_mon_generic_request, node);
-		op = le16_to_cpu(req->request->hdr.type);
-		if (op == CEPH_MSG_STATFS)
-			seq_printf(s, "%lld statfs\n", req->tid);
-		else
-			seq_printf(s, "%lld unknown\n", req->tid);
-	}
-
-	mutex_unlock(&monc->mutex);
-	return 0;
-}
-
-static int osdc_show(struct seq_file *s, void *pp)
-{
-	struct ceph_client *client = s->private;
-	struct ceph_osd_client *osdc = &client->osdc;
-	struct rb_node *p;
-
-	mutex_lock(&osdc->request_mutex);
-	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
-		struct ceph_osd_request *req;
-		struct ceph_osd_request_head *head;
-		struct ceph_osd_op *op;
-		int num_ops;
-		int opcode, olen;
-		int i;
-
-		req = rb_entry(p, struct ceph_osd_request, r_node);
-
-		seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
-			   req->r_osd ? req->r_osd->o_osd : -1,
-			   le32_to_cpu(req->r_pgid.pool),
-			   le16_to_cpu(req->r_pgid.ps));
-
-		head = req->r_request->front.iov_base;
-		op = (void *)(head + 1);
-
-		num_ops = le16_to_cpu(head->num_ops);
-		olen = le32_to_cpu(head->object_len);
-		seq_printf(s, "%.*s", olen,
-			   (const char *)(head->ops + num_ops));
-
-		if (req->r_reassert_version.epoch)
-			seq_printf(s, "\t%u'%llu",
-			   (unsigned)le32_to_cpu(req->r_reassert_version.epoch),
-			   le64_to_cpu(req->r_reassert_version.version));
-		else
-			seq_printf(s, "\t");
-
-		for (i = 0; i < num_ops; i++) {
-			opcode = le16_to_cpu(op->op);
-			seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
-			op++;
-		}
-
-		seq_printf(s, "\n");
-	}
-	mutex_unlock(&osdc->request_mutex);
-	return 0;
-}
-
-CEPH_DEFINE_SHOW_FUNC(monmap_show)
-CEPH_DEFINE_SHOW_FUNC(osdmap_show)
-CEPH_DEFINE_SHOW_FUNC(monc_show)
-CEPH_DEFINE_SHOW_FUNC(osdc_show)
-
-int ceph_debugfs_init(void)
-{
-	ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
-	if (!ceph_debugfs_dir)
-		return -ENOMEM;
-	return 0;
-}
-
-void ceph_debugfs_cleanup(void)
-{
-	debugfs_remove(ceph_debugfs_dir);
-}
-
-int ceph_debugfs_client_init(struct ceph_client *client)
-{
-	int ret = -ENOMEM;
-	char name[80];
-
-	snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
-		 client->monc.auth->global_id);
-
-	client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
-	if (!client->debugfs_dir)
-		goto out;
-
-	client->monc.debugfs_file = debugfs_create_file("monc",
-						      0600,
-						      client->debugfs_dir,
-						      client,
-						      &monc_show_fops);
-	if (!client->monc.debugfs_file)
-		goto out;
-
-	client->osdc.debugfs_file = debugfs_create_file("osdc",
-						      0600,
-						      client->debugfs_dir,
-						      client,
-						      &osdc_show_fops);
-	if (!client->osdc.debugfs_file)
-		goto out;
-
-	client->debugfs_monmap = debugfs_create_file("monmap",
-					0600,
-					client->debugfs_dir,
-					client,
-					&monmap_show_fops);
-	if (!client->debugfs_monmap)
-		goto out;
-
-	client->debugfs_osdmap = debugfs_create_file("osdmap",
-					0600,
-					client->debugfs_dir,
-					client,
-					&osdmap_show_fops);
-	if (!client->debugfs_osdmap)
-		goto out;
-
-	return 0;
-
-out:
-	ceph_debugfs_client_cleanup(client);
-	return ret;
-}
-
-void ceph_debugfs_client_cleanup(struct ceph_client *client)
-{
-	debugfs_remove(client->debugfs_osdmap);
-	debugfs_remove(client->debugfs_monmap);
-	debugfs_remove(client->osdc.debugfs_file);
-	debugfs_remove(client->monc.debugfs_file);
-	debugfs_remove(client->debugfs_dir);
-}
-
-#else  /* CONFIG_DEBUG_FS */
-
-int ceph_debugfs_init(void)
-{
-	return 0;
-}
-
-void ceph_debugfs_cleanup(void)
-{
-}
-
-int ceph_debugfs_client_init(struct ceph_client *client)
-{
-	return 0;
-}
-
-void ceph_debugfs_client_cleanup(struct ceph_client *client)
-{
-}
-
-#endif  /* CONFIG_DEBUG_FS */
-
-EXPORT_SYMBOL(ceph_debugfs_init);
-EXPORT_SYMBOL(ceph_debugfs_cleanup);
diff --git a/trunk/net/ceph/pagelist.c b/trunk/net/ceph/pagelist.c
deleted file mode 100644
index 13cb409a7bba..000000000000
--- a/trunk/net/ceph/pagelist.c
+++ /dev/null
@@ -1,154 +0,0 @@
-
-#include <linux/module.h>
-#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
-#include <linux/ceph/pagelist.h>
-
-static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
-{
-	if (pl->mapped_tail) {
-		struct page *page = list_entry(pl->head.prev, struct page, lru);
-		kunmap(page);
-		pl->mapped_tail = NULL;
-	}
-}
-
-int ceph_pagelist_release(struct ceph_pagelist *pl)
-{
-	ceph_pagelist_unmap_tail(pl);
-	while (!list_empty(&pl->head)) {
-		struct page *page = list_first_entry(&pl->head, struct page,
-						     lru);
-		list_del(&page->lru);
-		__free_page(page);
-	}
-	ceph_pagelist_free_reserve(pl);
-	return 0;
-}
-EXPORT_SYMBOL(ceph_pagelist_release);
-
-static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
-{
-	struct page *page;
-
-	if (!pl->num_pages_free) {
-		page = __page_cache_alloc(GFP_NOFS);
-	} else {
-		page = list_first_entry(&pl->free_list, struct page, lru);
-		list_del(&page->lru);
-		--pl->num_pages_free;
-	}
-	if (!page)
-		return -ENOMEM;
-	pl->room += PAGE_SIZE;
-	ceph_pagelist_unmap_tail(pl);
-	list_add_tail(&page->lru, &pl->head);
-	pl->mapped_tail = kmap(page);
-	return 0;
-}
-
-int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
-{
-	while (pl->room < len) {
-		size_t bit = pl->room;
-		int ret;
-
-		memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK),
-		       buf, bit);
-		pl->length += bit;
-		pl->room -= bit;
-		buf += bit;
-		len -= bit;
-		ret = ceph_pagelist_addpage(pl);
-		if (ret)
-			return ret;
-	}
-
-	memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len);
-	pl->length += len;
-	pl->room -= len;
-	return 0;
-}
-EXPORT_SYMBOL(ceph_pagelist_append);
-
-/**
- * Allocate enough pages for a pagelist to append the given amount
- * of data without without allocating.
- * Returns: 0 on success, -ENOMEM on error.
- */
-int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space)
-{
-	if (space <= pl->room)
-		return 0;
-	space -= pl->room;
-	space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT;   /* conv to num pages */
-
-	while (space > pl->num_pages_free) {
-		struct page *page = __page_cache_alloc(GFP_NOFS);
-		if (!page)
-			return -ENOMEM;
-		list_add_tail(&page->lru, &pl->free_list);
-		++pl->num_pages_free;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(ceph_pagelist_reserve);
-
-/**
- * Free any pages that have been preallocated.
- */
-int ceph_pagelist_free_reserve(struct ceph_pagelist *pl)
-{
-	while (!list_empty(&pl->free_list)) {
-		struct page *page = list_first_entry(&pl->free_list,
-						     struct page, lru);
-		list_del(&page->lru);
-		__free_page(page);
-		--pl->num_pages_free;
-	}
-	BUG_ON(pl->num_pages_free);
-	return 0;
-}
-EXPORT_SYMBOL(ceph_pagelist_free_reserve);
-
-/**
- * Create a truncation point.
- */
-void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
-			      struct ceph_pagelist_cursor *c)
-{
-	c->pl = pl;
-	c->page_lru = pl->head.prev;
-	c->room = pl->room;
-}
-EXPORT_SYMBOL(ceph_pagelist_set_cursor);
-
-/**
- * Truncate a pagelist to the given point. Move extra pages to reserve.
- * This won't sleep.
- * Returns: 0 on success,
- *          -EINVAL if the pagelist doesn't match the trunc point pagelist
- */
-int ceph_pagelist_truncate(struct ceph_pagelist *pl,
-			   struct ceph_pagelist_cursor *c)
-{
-	struct page *page;
-
-	if (pl != c->pl)
-		return -EINVAL;
-	ceph_pagelist_unmap_tail(pl);
-	while (pl->head.prev != c->page_lru) {
-		page = list_entry(pl->head.prev, struct page, lru);
-		list_del(&page->lru);                /* remove from pagelist */
-		list_add_tail(&page->lru, &pl->free_list); /* add to reserve */
-		++pl->num_pages_free;
-	}
-	pl->room = c->room;
-	if (!list_empty(&pl->head)) {
-		page = list_entry(pl->head.prev, struct page, lru);
-		pl->mapped_tail = kmap(page);
-	}
-	return 0;
-}
-EXPORT_SYMBOL(ceph_pagelist_truncate);
diff --git a/trunk/net/ceph/pagevec.c b/trunk/net/ceph/pagevec.c
deleted file mode 100644
index 54caf0687155..000000000000
--- a/trunk/net/ceph/pagevec.c
+++ /dev/null
@@ -1,223 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/file.h>
-#include <linux/namei.h>
-#include <linux/writeback.h>
-
-#include <linux/ceph/libceph.h>
-
-/*
- * build a vector of user pages
- */
-struct page **ceph_get_direct_page_vector(const char __user *data,
-						 int num_pages,
-						 loff_t off, size_t len)
-{
-	struct page **pages;
-	int rc;
-
-	pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
-	if (!pages)
-		return ERR_PTR(-ENOMEM);
-
-	down_read(&current->mm->mmap_sem);
-	rc = get_user_pages(current, current->mm, (unsigned long)data,
-			    num_pages, 0, 0, pages, NULL);
-	up_read(&current->mm->mmap_sem);
-	if (rc < 0)
-		goto fail;
-	return pages;
-
-fail:
-	kfree(pages);
-	return ERR_PTR(rc);
-}
-EXPORT_SYMBOL(ceph_get_direct_page_vector);
-
-void ceph_put_page_vector(struct page **pages, int num_pages)
-{
-	int i;
-
-	for (i = 0; i < num_pages; i++)
-		put_page(pages[i]);
-	kfree(pages);
-}
-EXPORT_SYMBOL(ceph_put_page_vector);
-
-void ceph_release_page_vector(struct page **pages, int num_pages)
-{
-	int i;
-
-	for (i = 0; i < num_pages; i++)
-		__free_pages(pages[i], 0);
-	kfree(pages);
-}
-EXPORT_SYMBOL(ceph_release_page_vector);
-
-/*
- * allocate a vector new pages
- */
-struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
-{
-	struct page **pages;
-	int i;
-
-	pages = kmalloc(sizeof(*pages) * num_pages, flags);
-	if (!pages)
-		return ERR_PTR(-ENOMEM);
-	for (i = 0; i < num_pages; i++) {
-		pages[i] = __page_cache_alloc(flags);
-		if (pages[i] == NULL) {
-			ceph_release_page_vector(pages, i);
-			return ERR_PTR(-ENOMEM);
-		}
-	}
-	return pages;
-}
-EXPORT_SYMBOL(ceph_alloc_page_vector);
-
-/*
- * copy user data into a page vector
- */
-int ceph_copy_user_to_page_vector(struct page **pages,
-					 const char __user *data,
-					 loff_t off, size_t len)
-{
-	int i = 0;
-	int po = off & ~PAGE_CACHE_MASK;
-	int left = len;
-	int l, bad;
-
-	while (left > 0) {
-		l = min_t(int, PAGE_CACHE_SIZE-po, left);
-		bad = copy_from_user(page_address(pages[i]) + po, data, l);
-		if (bad == l)
-			return -EFAULT;
-		data += l - bad;
-		left -= l - bad;
-		po += l - bad;
-		if (po == PAGE_CACHE_SIZE) {
-			po = 0;
-			i++;
-		}
-	}
-	return len;
-}
-EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
-
-int ceph_copy_to_page_vector(struct page **pages,
-				    const char *data,
-				    loff_t off, size_t len)
-{
-	int i = 0;
-	size_t po = off & ~PAGE_CACHE_MASK;
-	size_t left = len;
-	size_t l;
-
-	while (left > 0) {
-		l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
-		memcpy(page_address(pages[i]) + po, data, l);
-		data += l;
-		left -= l;
-		po += l;
-		if (po == PAGE_CACHE_SIZE) {
-			po = 0;
-			i++;
-		}
-	}
-	return len;
-}
-EXPORT_SYMBOL(ceph_copy_to_page_vector);
-
-int ceph_copy_from_page_vector(struct page **pages,
-				    char *data,
-				    loff_t off, size_t len)
-{
-	int i = 0;
-	size_t po = off & ~PAGE_CACHE_MASK;
-	size_t left = len;
-	size_t l;
-
-	while (left > 0) {
-		l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
-		memcpy(data, page_address(pages[i]) + po, l);
-		data += l;
-		left -= l;
-		po += l;
-		if (po == PAGE_CACHE_SIZE) {
-			po = 0;
-			i++;
-		}
-	}
-	return len;
-}
-EXPORT_SYMBOL(ceph_copy_from_page_vector);
-
-/*
- * copy user data from a page vector into a user pointer
- */
-int ceph_copy_page_vector_to_user(struct page **pages,
-					 char __user *data,
-					 loff_t off, size_t len)
-{
-	int i = 0;
-	int po = off & ~PAGE_CACHE_MASK;
-	int left = len;
-	int l, bad;
-
-	while (left > 0) {
-		l = min_t(int, left, PAGE_CACHE_SIZE-po);
-		bad = copy_to_user(data, page_address(pages[i]) + po, l);
-		if (bad == l)
-			return -EFAULT;
-		data += l - bad;
-		left -= l - bad;
-		if (po) {
-			po += l - bad;
-			if (po == PAGE_CACHE_SIZE)
-				po = 0;
-		}
-		i++;
-	}
-	return len;
-}
-EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
-
-/*
- * Zero an extent within a page vector.  Offset is relative to the
- * start of the first page.
- */
-void ceph_zero_page_vector_range(int off, int len, struct page **pages)
-{
-	int i = off >> PAGE_CACHE_SHIFT;
-
-	off &= ~PAGE_CACHE_MASK;
-
-	dout("zero_page_vector_page %u~%u\n", off, len);
-
-	/* leading partial page? */
-	if (off) {
-		int end = min((int)PAGE_CACHE_SIZE, off + len);
-		dout("zeroing %d %p head from %d\n", i, pages[i],
-		     (int)off);
-		zero_user_segment(pages[i], off, end);
-		len -= (end - off);
-		i++;
-	}
-	while (len >= PAGE_CACHE_SIZE) {
-		dout("zeroing %d %p len=%d\n", i, pages[i], len);
-		zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
-		len -= PAGE_CACHE_SIZE;
-		i++;
-	}
-	/* trailing partial page? */
-	if (len) {
-		dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
-		zero_user_segment(pages[i], 0, len);
-	}
-}
-EXPORT_SYMBOL(ceph_zero_page_vector_range);
-
diff --git a/trunk/security/tomoyo/common.c b/trunk/security/tomoyo/common.c
index c668b447c725..e0a1059aaf3a 100644
--- a/trunk/security/tomoyo/common.c
+++ b/trunk/security/tomoyo/common.c
@@ -768,8 +768,10 @@ static bool tomoyo_select_one(struct tomoyo_io_buffer *head, const char *data)
 		return true; /* Do nothing if open(O_WRONLY). */
 	memset(&head->r, 0, sizeof(head->r));
 	head->r.print_this_domain_only = true;
-	head->r.eof = !domain;
-	head->r.domain = &domain->list;
+	if (domain)
+		head->r.domain = &domain->list;
+	else
+		head->r.eof = 1;
 	tomoyo_io_printf(head, "# select %s\n", data);
 	if (domain && domain->is_deleted)
 		tomoyo_io_printf(head, "# This is a deleted domain.\n");