diff --git a/[refs] b/[refs] index 47f4c8a38296..ca06b971490f 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 94ebd235c493f43681f609b0e02733337053e8f0 +refs/heads/master: 68eda8f59081c74a51d037cc29893bd7c9b3c2d8 diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 3d4179fbc526..f2a2b8e647c5 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -1527,8 +1527,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git S: Supported F: Documentation/filesystems/ceph.txt F: fs/ceph -F: net/ceph -F: include/linux/ceph CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: M: David Vrabel @@ -4807,15 +4805,6 @@ F: fs/qnx4/ F: include/linux/qnx4_fs.h F: include/linux/qnxtypes.h -RADOS BLOCK DEVICE (RBD) -F: include/linux/qnxtypes.h -M: Yehuda Sadeh -M: Sage Weil -M: ceph-devel@vger.kernel.org -S: Supported -F: drivers/block/rbd.c -F: drivers/block/rbd_types.h - RADEON FRAMEBUFFER DISPLAY DRIVER M: Benjamin Herrenschmidt L: linux-fbdev@vger.kernel.org diff --git a/trunk/drivers/block/Kconfig b/trunk/drivers/block/Kconfig index 4b9359a6f6ca..de277689da61 100644 --- a/trunk/drivers/block/Kconfig +++ b/trunk/drivers/block/Kconfig @@ -488,21 +488,4 @@ config BLK_DEV_HD If unsure, say N. -config BLK_DEV_RBD - tristate "Rados block device (RBD)" - depends on INET && EXPERIMENTAL && BLOCK - select CEPH_LIB - select LIBCRC32C - select CRYPTO_AES - select CRYPTO - default n - help - Say Y here if you want include the Rados block device, which stripes - a block device over objects stored in the Ceph distributed object - store. - - More information at http://ceph.newdream.net/. - - If unsure, say N. - endif # BLK_DEV diff --git a/trunk/drivers/block/Makefile b/trunk/drivers/block/Makefile index d7f463d6312d..aff5ac925c34 100644 --- a/trunk/drivers/block/Makefile +++ b/trunk/drivers/block/Makefile @@ -37,6 +37,5 @@ obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ -obj-$(CONFIG_BLK_DEV_RBD) += rbd.o swim_mod-objs := swim.o swim_asm.o diff --git a/trunk/drivers/block/rbd.c b/trunk/drivers/block/rbd.c deleted file mode 100644 index 6ec9d53806c5..000000000000 --- a/trunk/drivers/block/rbd.c +++ /dev/null @@ -1,1841 +0,0 @@ -/* - rbd.c -- Export ceph rados objects as a Linux block device - - - based on drivers/block/osdblk.c: - - Copyright 2009 Red Hat, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to - the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - - - - Instructions for use - -------------------- - - 1) Map a Linux block device to an existing rbd image. - - Usage: [snap name] - - $ echo "192.168.0.1 name=admin rbd foo" > /sys/class/rbd/add - - The snapshot name can be "-" or omitted to map the image read/write. - - 2) List all active blkdev<->object mappings. - - In this example, we have performed step #1 twice, creating two blkdevs, - mapped to two separate rados objects in the rados rbd pool - - $ cat /sys/class/rbd/list - #id major client_name pool name snap KB - 0 254 client4143 rbd foo - 1024000 - - The columns, in order, are: - - blkdev unique id - - blkdev assigned major - - rados client id - - rados pool name - - rados block device name - - mapped snapshot ("-" if none) - - device size in KB - - - 3) Create a snapshot. - - Usage: - - $ echo "0 mysnap" > /sys/class/rbd/snap_create - - - 4) Listing a snapshot. - - $ cat /sys/class/rbd/snaps_list - #id snap KB - 0 - 1024000 (*) - 0 foo 1024000 - - The columns, in order, are: - - blkdev unique id - - snapshot name, '-' means none (active read/write version) - - size of device at time of snapshot - - the (*) indicates this is the active version - - 5) Rollback to snapshot. - - Usage: - - $ echo "0 mysnap" > /sys/class/rbd/snap_rollback - - - 6) Mapping an image using snapshot. - - A snapshot mapping is read-only. This is being done by passing - snap= to the options when adding a device. - - $ echo "192.168.0.1 name=admin,snap=mysnap rbd foo" > /sys/class/rbd/add - - - 7) Remove an active blkdev<->rbd image mapping. - - In this example, we remove the mapping with blkdev unique id 1. - - $ echo 1 > /sys/class/rbd/remove - - - NOTE: The actual creation and deletion of rados objects is outside the scope - of this driver. - - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "rbd_types.h" - -#define DRV_NAME "rbd" -#define DRV_NAME_LONG "rbd (rados block device)" - -#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ - -#define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX)) -#define RBD_MAX_POOL_NAME_LEN 64 -#define RBD_MAX_SNAP_NAME_LEN 32 -#define RBD_MAX_OPT_LEN 1024 - -#define RBD_SNAP_HEAD_NAME "-" - -#define DEV_NAME_LEN 32 - -/* - * block device image metadata (in-memory version) - */ -struct rbd_image_header { - u64 image_size; - char block_name[32]; - __u8 obj_order; - __u8 crypt_type; - __u8 comp_type; - struct rw_semaphore snap_rwsem; - struct ceph_snap_context *snapc; - size_t snap_names_len; - u64 snap_seq; - u32 total_snaps; - - char *snap_names; - u64 *snap_sizes; -}; - -/* - * an instance of the client. multiple devices may share a client. - */ -struct rbd_client { - struct ceph_client *client; - struct kref kref; - struct list_head node; -}; - -/* - * a single io request - */ -struct rbd_request { - struct request *rq; /* blk layer request */ - struct bio *bio; /* cloned bio */ - struct page **pages; /* list of used pages */ - u64 len; -}; - -/* - * a single device - */ -struct rbd_device { - int id; /* blkdev unique id */ - - int major; /* blkdev assigned major */ - struct gendisk *disk; /* blkdev's gendisk and rq */ - struct request_queue *q; - - struct ceph_client *client; - struct rbd_client *rbd_client; - - char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ - - spinlock_t lock; /* queue lock */ - - struct rbd_image_header header; - char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */ - int obj_len; - char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */ - char pool_name[RBD_MAX_POOL_NAME_LEN]; - int poolid; - - char snap_name[RBD_MAX_SNAP_NAME_LEN]; - u32 cur_snap; /* index+1 of current snapshot within snap context - 0 - for the head */ - int read_only; - - struct list_head node; -}; - -static spinlock_t node_lock; /* protects client get/put */ - -static struct class *class_rbd; /* /sys/class/rbd */ -static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ -static LIST_HEAD(rbd_dev_list); /* devices */ -static LIST_HEAD(rbd_client_list); /* clients */ - - -static int rbd_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk *disk = bdev->bd_disk; - struct rbd_device *rbd_dev = disk->private_data; - - set_device_ro(bdev, rbd_dev->read_only); - - if ((mode & FMODE_WRITE) && rbd_dev->read_only) - return -EROFS; - - return 0; -} - -static const struct block_device_operations rbd_bd_ops = { - .owner = THIS_MODULE, - .open = rbd_open, -}; - -/* - * Initialize an rbd client instance. - * We own *opt. - */ -static struct rbd_client *rbd_client_create(struct ceph_options *opt) -{ - struct rbd_client *rbdc; - int ret = -ENOMEM; - - dout("rbd_client_create\n"); - rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); - if (!rbdc) - goto out_opt; - - kref_init(&rbdc->kref); - INIT_LIST_HEAD(&rbdc->node); - - rbdc->client = ceph_create_client(opt, rbdc); - if (IS_ERR(rbdc->client)) - goto out_rbdc; - opt = NULL; /* Now rbdc->client is responsible for opt */ - - ret = ceph_open_session(rbdc->client); - if (ret < 0) - goto out_err; - - spin_lock(&node_lock); - list_add_tail(&rbdc->node, &rbd_client_list); - spin_unlock(&node_lock); - - dout("rbd_client_create created %p\n", rbdc); - return rbdc; - -out_err: - ceph_destroy_client(rbdc->client); -out_rbdc: - kfree(rbdc); -out_opt: - if (opt) - ceph_destroy_options(opt); - return ERR_PTR(ret); -} - -/* - * Find a ceph client with specific addr and configuration. - */ -static struct rbd_client *__rbd_client_find(struct ceph_options *opt) -{ - struct rbd_client *client_node; - - if (opt->flags & CEPH_OPT_NOSHARE) - return NULL; - - list_for_each_entry(client_node, &rbd_client_list, node) - if (ceph_compare_options(opt, client_node->client) == 0) - return client_node; - return NULL; -} - -/* - * Get a ceph client with specific addr and configuration, if one does - * not exist create it. - */ -static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, - char *options) -{ - struct rbd_client *rbdc; - struct ceph_options *opt; - int ret; - - ret = ceph_parse_options(&opt, options, mon_addr, - mon_addr + strlen(mon_addr), NULL, NULL); - if (ret < 0) - return ret; - - spin_lock(&node_lock); - rbdc = __rbd_client_find(opt); - if (rbdc) { - ceph_destroy_options(opt); - - /* using an existing client */ - kref_get(&rbdc->kref); - rbd_dev->rbd_client = rbdc; - rbd_dev->client = rbdc->client; - spin_unlock(&node_lock); - return 0; - } - spin_unlock(&node_lock); - - rbdc = rbd_client_create(opt); - if (IS_ERR(rbdc)) - return PTR_ERR(rbdc); - - rbd_dev->rbd_client = rbdc; - rbd_dev->client = rbdc->client; - return 0; -} - -/* - * Destroy ceph client - */ -static void rbd_client_release(struct kref *kref) -{ - struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); - - dout("rbd_release_client %p\n", rbdc); - spin_lock(&node_lock); - list_del(&rbdc->node); - spin_unlock(&node_lock); - - ceph_destroy_client(rbdc->client); - kfree(rbdc); -} - -/* - * Drop reference to ceph client node. If it's not referenced anymore, release - * it. - */ -static void rbd_put_client(struct rbd_device *rbd_dev) -{ - kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); - rbd_dev->rbd_client = NULL; - rbd_dev->client = NULL; -} - - -/* - * Create a new header structure, translate header format from the on-disk - * header. - */ -static int rbd_header_from_disk(struct rbd_image_header *header, - struct rbd_image_header_ondisk *ondisk, - int allocated_snaps, - gfp_t gfp_flags) -{ - int i; - u32 snap_count = le32_to_cpu(ondisk->snap_count); - int ret = -ENOMEM; - - init_rwsem(&header->snap_rwsem); - - header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); - header->snapc = kmalloc(sizeof(struct ceph_snap_context) + - snap_count * - sizeof(struct rbd_image_snap_ondisk), - gfp_flags); - if (!header->snapc) - return -ENOMEM; - if (snap_count) { - header->snap_names = kmalloc(header->snap_names_len, - GFP_KERNEL); - if (!header->snap_names) - goto err_snapc; - header->snap_sizes = kmalloc(snap_count * sizeof(u64), - GFP_KERNEL); - if (!header->snap_sizes) - goto err_names; - } else { - header->snap_names = NULL; - header->snap_sizes = NULL; - } - memcpy(header->block_name, ondisk->block_name, - sizeof(ondisk->block_name)); - - header->image_size = le64_to_cpu(ondisk->image_size); - header->obj_order = ondisk->options.order; - header->crypt_type = ondisk->options.crypt_type; - header->comp_type = ondisk->options.comp_type; - - atomic_set(&header->snapc->nref, 1); - header->snap_seq = le64_to_cpu(ondisk->snap_seq); - header->snapc->num_snaps = snap_count; - header->total_snaps = snap_count; - - if (snap_count && - allocated_snaps == snap_count) { - for (i = 0; i < snap_count; i++) { - header->snapc->snaps[i] = - le64_to_cpu(ondisk->snaps[i].id); - header->snap_sizes[i] = - le64_to_cpu(ondisk->snaps[i].image_size); - } - - /* copy snapshot names */ - memcpy(header->snap_names, &ondisk->snaps[i], - header->snap_names_len); - } - - return 0; - -err_names: - kfree(header->snap_names); -err_snapc: - kfree(header->snapc); - return ret; -} - -static int snap_index(struct rbd_image_header *header, int snap_num) -{ - return header->total_snaps - snap_num; -} - -static u64 cur_snap_id(struct rbd_device *rbd_dev) -{ - struct rbd_image_header *header = &rbd_dev->header; - - if (!rbd_dev->cur_snap) - return 0; - - return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)]; -} - -static int snap_by_name(struct rbd_image_header *header, const char *snap_name, - u64 *seq, u64 *size) -{ - int i; - char *p = header->snap_names; - - for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { - if (strcmp(snap_name, p) == 0) - break; - } - if (i == header->total_snaps) - return -ENOENT; - if (seq) - *seq = header->snapc->snaps[i]; - - if (size) - *size = header->snap_sizes[i]; - - return i; -} - -static int rbd_header_set_snap(struct rbd_device *dev, - const char *snap_name, - u64 *size) -{ - struct rbd_image_header *header = &dev->header; - struct ceph_snap_context *snapc = header->snapc; - int ret = -ENOENT; - - down_write(&header->snap_rwsem); - - if (!snap_name || - !*snap_name || - strcmp(snap_name, "-") == 0 || - strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) { - if (header->total_snaps) - snapc->seq = header->snap_seq; - else - snapc->seq = 0; - dev->cur_snap = 0; - dev->read_only = 0; - if (size) - *size = header->image_size; - } else { - ret = snap_by_name(header, snap_name, &snapc->seq, size); - if (ret < 0) - goto done; - - dev->cur_snap = header->total_snaps - ret; - dev->read_only = 1; - } - - ret = 0; -done: - up_write(&header->snap_rwsem); - return ret; -} - -static void rbd_header_free(struct rbd_image_header *header) -{ - kfree(header->snapc); - kfree(header->snap_names); - kfree(header->snap_sizes); -} - -/* - * get the actual striped segment name, offset and length - */ -static u64 rbd_get_segment(struct rbd_image_header *header, - const char *block_name, - u64 ofs, u64 len, - char *seg_name, u64 *segofs) -{ - u64 seg = ofs >> header->obj_order; - - if (seg_name) - snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, - "%s.%012llx", block_name, seg); - - ofs = ofs & ((1 << header->obj_order) - 1); - len = min_t(u64, len, (1 << header->obj_order) - ofs); - - if (segofs) - *segofs = ofs; - - return len; -} - -/* - * bio helpers - */ - -static void bio_chain_put(struct bio *chain) -{ - struct bio *tmp; - - while (chain) { - tmp = chain; - chain = chain->bi_next; - bio_put(tmp); - } -} - -/* - * zeros a bio chain, starting at specific offset - */ -static void zero_bio_chain(struct bio *chain, int start_ofs) -{ - struct bio_vec *bv; - unsigned long flags; - void *buf; - int i; - int pos = 0; - - while (chain) { - bio_for_each_segment(bv, chain, i) { - if (pos + bv->bv_len > start_ofs) { - int remainder = max(start_ofs - pos, 0); - buf = bvec_kmap_irq(bv, &flags); - memset(buf + remainder, 0, - bv->bv_len - remainder); - bvec_kunmap_irq(buf, &flags); - } - pos += bv->bv_len; - } - - chain = chain->bi_next; - } -} - -/* - * bio_chain_clone - clone a chain of bios up to a certain length. - * might return a bio_pair that will need to be released. - */ -static struct bio *bio_chain_clone(struct bio **old, struct bio **next, - struct bio_pair **bp, - int len, gfp_t gfpmask) -{ - struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL; - int total = 0; - - if (*bp) { - bio_pair_release(*bp); - *bp = NULL; - } - - while (old_chain && (total < len)) { - tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); - if (!tmp) - goto err_out; - - if (total + old_chain->bi_size > len) { - struct bio_pair *bp; - - /* - * this split can only happen with a single paged bio, - * split_bio will BUG_ON if this is not the case - */ - dout("bio_chain_clone split! total=%d remaining=%d" - "bi_size=%d\n", - (int)total, (int)len-total, - (int)old_chain->bi_size); - - /* split the bio. We'll release it either in the next - call, or it will have to be released outside */ - bp = bio_split(old_chain, (len - total) / 512ULL); - if (!bp) - goto err_out; - - __bio_clone(tmp, &bp->bio1); - - *next = &bp->bio2; - } else { - __bio_clone(tmp, old_chain); - *next = old_chain->bi_next; - } - - tmp->bi_bdev = NULL; - gfpmask &= ~__GFP_WAIT; - tmp->bi_next = NULL; - - if (!new_chain) { - new_chain = tail = tmp; - } else { - tail->bi_next = tmp; - tail = tmp; - } - old_chain = old_chain->bi_next; - - total += tmp->bi_size; - } - - BUG_ON(total < len); - - if (tail) - tail->bi_next = NULL; - - *old = old_chain; - - return new_chain; - -err_out: - dout("bio_chain_clone with err\n"); - bio_chain_put(new_chain); - return NULL; -} - -/* - * helpers for osd request op vectors. - */ -static int rbd_create_rw_ops(struct ceph_osd_req_op **ops, - int num_ops, - int opcode, - u32 payload_len) -{ - *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1), - GFP_NOIO); - if (!*ops) - return -ENOMEM; - (*ops)[0].op = opcode; - /* - * op extent offset and length will be set later on - * in calc_raw_layout() - */ - (*ops)[0].payload_len = payload_len; - return 0; -} - -static void rbd_destroy_ops(struct ceph_osd_req_op *ops) -{ - kfree(ops); -} - -/* - * Send ceph osd request - */ -static int rbd_do_request(struct request *rq, - struct rbd_device *dev, - struct ceph_snap_context *snapc, - u64 snapid, - const char *obj, u64 ofs, u64 len, - struct bio *bio, - struct page **pages, - int num_pages, - int flags, - struct ceph_osd_req_op *ops, - int num_reply, - void (*rbd_cb)(struct ceph_osd_request *req, - struct ceph_msg *msg)) -{ - struct ceph_osd_request *req; - struct ceph_file_layout *layout; - int ret; - u64 bno; - struct timespec mtime = CURRENT_TIME; - struct rbd_request *req_data; - struct ceph_osd_request_head *reqhead; - struct rbd_image_header *header = &dev->header; - - ret = -ENOMEM; - req_data = kzalloc(sizeof(*req_data), GFP_NOIO); - if (!req_data) - goto done; - - dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs); - - down_read(&header->snap_rwsem); - - req = ceph_osdc_alloc_request(&dev->client->osdc, flags, - snapc, - ops, - false, - GFP_NOIO, pages, bio); - if (IS_ERR(req)) { - up_read(&header->snap_rwsem); - ret = PTR_ERR(req); - goto done_pages; - } - - req->r_callback = rbd_cb; - - req_data->rq = rq; - req_data->bio = bio; - req_data->pages = pages; - req_data->len = len; - - req->r_priv = req_data; - - reqhead = req->r_request->front.iov_base; - reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); - - strncpy(req->r_oid, obj, sizeof(req->r_oid)); - req->r_oid_len = strlen(req->r_oid); - - layout = &req->r_file_layout; - memset(layout, 0, sizeof(*layout)); - layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_stripe_count = cpu_to_le32(1); - layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_preferred = cpu_to_le32(-1); - layout->fl_pg_pool = cpu_to_le32(dev->poolid); - ceph_calc_raw_layout(&dev->client->osdc, layout, snapid, - ofs, &len, &bno, req, ops); - - ceph_osdc_build_request(req, ofs, &len, - ops, - snapc, - &mtime, - req->r_oid, req->r_oid_len); - up_read(&header->snap_rwsem); - - ret = ceph_osdc_start_request(&dev->client->osdc, req, false); - if (ret < 0) - goto done_err; - - if (!rbd_cb) { - ret = ceph_osdc_wait_request(&dev->client->osdc, req); - ceph_osdc_put_request(req); - } - return ret; - -done_err: - bio_chain_put(req_data->bio); - ceph_osdc_put_request(req); -done_pages: - kfree(req_data); -done: - if (rq) - blk_end_request(rq, ret, len); - return ret; -} - -/* - * Ceph osd op callback - */ -static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) -{ - struct rbd_request *req_data = req->r_priv; - struct ceph_osd_reply_head *replyhead; - struct ceph_osd_op *op; - __s32 rc; - u64 bytes; - int read_op; - - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - op = (void *)(replyhead + 1); - rc = le32_to_cpu(replyhead->result); - bytes = le64_to_cpu(op->extent.length); - read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ); - - dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); - - if (rc == -ENOENT && read_op) { - zero_bio_chain(req_data->bio, 0); - rc = 0; - } else if (rc == 0 && read_op && bytes < req_data->len) { - zero_bio_chain(req_data->bio, bytes); - bytes = req_data->len; - } - - blk_end_request(req_data->rq, rc, bytes); - - if (req_data->bio) - bio_chain_put(req_data->bio); - - ceph_osdc_put_request(req); - kfree(req_data); -} - -/* - * Do a synchronous ceph osd operation - */ -static int rbd_req_sync_op(struct rbd_device *dev, - struct ceph_snap_context *snapc, - u64 snapid, - int opcode, - int flags, - struct ceph_osd_req_op *orig_ops, - int num_reply, - const char *obj, - u64 ofs, u64 len, - char *buf) -{ - int ret; - struct page **pages; - int num_pages; - struct ceph_osd_req_op *ops = orig_ops; - u32 payload_len; - - num_pages = calc_pages_for(ofs , len); - pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); - if (IS_ERR(pages)) - return PTR_ERR(pages); - - if (!orig_ops) { - payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0); - ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); - if (ret < 0) - goto done; - - if ((flags & CEPH_OSD_FLAG_WRITE) && buf) { - ret = ceph_copy_to_page_vector(pages, buf, ofs, len); - if (ret < 0) - goto done_ops; - } - } - - ret = rbd_do_request(NULL, dev, snapc, snapid, - obj, ofs, len, NULL, - pages, num_pages, - flags, - ops, - 2, - NULL); - if (ret < 0) - goto done_ops; - - if ((flags & CEPH_OSD_FLAG_READ) && buf) - ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); - -done_ops: - if (!orig_ops) - rbd_destroy_ops(ops); -done: - ceph_release_page_vector(pages, num_pages); - return ret; -} - -/* - * Do an asynchronous ceph osd operation - */ -static int rbd_do_op(struct request *rq, - struct rbd_device *rbd_dev , - struct ceph_snap_context *snapc, - u64 snapid, - int opcode, int flags, int num_reply, - u64 ofs, u64 len, - struct bio *bio) -{ - char *seg_name; - u64 seg_ofs; - u64 seg_len; - int ret; - struct ceph_osd_req_op *ops; - u32 payload_len; - - seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); - if (!seg_name) - return -ENOMEM; - - seg_len = rbd_get_segment(&rbd_dev->header, - rbd_dev->header.block_name, - ofs, len, - seg_name, &seg_ofs); - - payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); - - ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); - if (ret < 0) - goto done; - - /* we've taken care of segment sizes earlier when we - cloned the bios. We should never have a segment - truncated at this point */ - BUG_ON(seg_len < len); - - ret = rbd_do_request(rq, rbd_dev, snapc, snapid, - seg_name, seg_ofs, seg_len, - bio, - NULL, 0, - flags, - ops, - num_reply, - rbd_req_cb); -done: - kfree(seg_name); - return ret; -} - -/* - * Request async osd write - */ -static int rbd_req_write(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 ofs, u64 len, - struct bio *bio) -{ - return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, - CEPH_OSD_OP_WRITE, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - 2, - ofs, len, bio); -} - -/* - * Request async osd read - */ -static int rbd_req_read(struct request *rq, - struct rbd_device *rbd_dev, - u64 snapid, - u64 ofs, u64 len, - struct bio *bio) -{ - return rbd_do_op(rq, rbd_dev, NULL, - (snapid ? snapid : CEPH_NOSNAP), - CEPH_OSD_OP_READ, - CEPH_OSD_FLAG_READ, - 2, - ofs, len, bio); -} - -/* - * Request sync osd read - */ -static int rbd_req_sync_read(struct rbd_device *dev, - struct ceph_snap_context *snapc, - u64 snapid, - const char *obj, - u64 ofs, u64 len, - char *buf) -{ - return rbd_req_sync_op(dev, NULL, - (snapid ? snapid : CEPH_NOSNAP), - CEPH_OSD_OP_READ, - CEPH_OSD_FLAG_READ, - NULL, - 1, obj, ofs, len, buf); -} - -/* - * Request sync osd read - */ -static int rbd_req_sync_rollback_obj(struct rbd_device *dev, - u64 snapid, - const char *obj) -{ - struct ceph_osd_req_op *ops; - int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0); - if (ret < 0) - return ret; - - ops[0].snap.snapid = snapid; - - ret = rbd_req_sync_op(dev, NULL, - CEPH_NOSNAP, - 0, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ops, - 1, obj, 0, 0, NULL); - - rbd_destroy_ops(ops); - - if (ret < 0) - return ret; - - return ret; -} - -/* - * Request sync osd read - */ -static int rbd_req_sync_exec(struct rbd_device *dev, - const char *obj, - const char *cls, - const char *method, - const char *data, - int len) -{ - struct ceph_osd_req_op *ops; - int cls_len = strlen(cls); - int method_len = strlen(method); - int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL, - cls_len + method_len + len); - if (ret < 0) - return ret; - - ops[0].cls.class_name = cls; - ops[0].cls.class_len = (__u8)cls_len; - ops[0].cls.method_name = method; - ops[0].cls.method_len = (__u8)method_len; - ops[0].cls.argc = 0; - ops[0].cls.indata = data; - ops[0].cls.indata_len = len; - - ret = rbd_req_sync_op(dev, NULL, - CEPH_NOSNAP, - 0, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ops, - 1, obj, 0, 0, NULL); - - rbd_destroy_ops(ops); - - dout("cls_exec returned %d\n", ret); - return ret; -} - -/* - * block device queue callback - */ -static void rbd_rq_fn(struct request_queue *q) -{ - struct rbd_device *rbd_dev = q->queuedata; - struct request *rq; - struct bio_pair *bp = NULL; - - rq = blk_fetch_request(q); - - while (1) { - struct bio *bio; - struct bio *rq_bio, *next_bio = NULL; - bool do_write; - int size, op_size = 0; - u64 ofs; - - /* peek at request from block layer */ - if (!rq) - break; - - dout("fetched request\n"); - - /* filter out block requests we don't understand */ - if ((rq->cmd_type != REQ_TYPE_FS)) { - __blk_end_request_all(rq, 0); - goto next; - } - - /* deduce our operation (read, write) */ - do_write = (rq_data_dir(rq) == WRITE); - - size = blk_rq_bytes(rq); - ofs = blk_rq_pos(rq) * 512ULL; - rq_bio = rq->bio; - if (do_write && rbd_dev->read_only) { - __blk_end_request_all(rq, -EROFS); - goto next; - } - - spin_unlock_irq(q->queue_lock); - - dout("%s 0x%x bytes at 0x%llx\n", - do_write ? "write" : "read", - size, blk_rq_pos(rq) * 512ULL); - - do { - /* a bio clone to be passed down to OSD req */ - dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt); - op_size = rbd_get_segment(&rbd_dev->header, - rbd_dev->header.block_name, - ofs, size, - NULL, NULL); - bio = bio_chain_clone(&rq_bio, &next_bio, &bp, - op_size, GFP_ATOMIC); - if (!bio) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENOMEM); - goto next; - } - - /* init OSD command: write or read */ - if (do_write) - rbd_req_write(rq, rbd_dev, - rbd_dev->header.snapc, - ofs, - op_size, bio); - else - rbd_req_read(rq, rbd_dev, - cur_snap_id(rbd_dev), - ofs, - op_size, bio); - - size -= op_size; - ofs += op_size; - - rq_bio = next_bio; - } while (size > 0); - - if (bp) - bio_pair_release(bp); - - spin_lock_irq(q->queue_lock); -next: - rq = blk_fetch_request(q); - } -} - -/* - * a queue callback. Makes sure that we don't create a bio that spans across - * multiple osd objects. One exception would be with a single page bios, - * which we handle later at bio_chain_clone - */ -static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, - struct bio_vec *bvec) -{ - struct rbd_device *rbd_dev = q->queuedata; - unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9); - sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); - unsigned int bio_sectors = bmd->bi_size >> 9; - int max; - - max = (chunk_sectors - ((sector & (chunk_sectors - 1)) - + bio_sectors)) << 9; - if (max < 0) - max = 0; /* bio_add cannot handle a negative return */ - if (max <= bvec->bv_len && bio_sectors == 0) - return bvec->bv_len; - return max; -} - -static void rbd_free_disk(struct rbd_device *rbd_dev) -{ - struct gendisk *disk = rbd_dev->disk; - - if (!disk) - return; - - rbd_header_free(&rbd_dev->header); - - if (disk->flags & GENHD_FL_UP) - del_gendisk(disk); - if (disk->queue) - blk_cleanup_queue(disk->queue); - put_disk(disk); -} - -/* - * reload the ondisk the header - */ -static int rbd_read_header(struct rbd_device *rbd_dev, - struct rbd_image_header *header) -{ - ssize_t rc; - struct rbd_image_header_ondisk *dh; - int snap_count = 0; - u64 snap_names_len = 0; - - while (1) { - int len = sizeof(*dh) + - snap_count * sizeof(struct rbd_image_snap_ondisk) + - snap_names_len; - - rc = -ENOMEM; - dh = kmalloc(len, GFP_KERNEL); - if (!dh) - return -ENOMEM; - - rc = rbd_req_sync_read(rbd_dev, - NULL, CEPH_NOSNAP, - rbd_dev->obj_md_name, - 0, len, - (char *)dh); - if (rc < 0) - goto out_dh; - - rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); - if (rc < 0) - goto out_dh; - - if (snap_count != header->total_snaps) { - snap_count = header->total_snaps; - snap_names_len = header->snap_names_len; - rbd_header_free(header); - kfree(dh); - continue; - } - break; - } - -out_dh: - kfree(dh); - return rc; -} - -/* - * create a snapshot - */ -static int rbd_header_add_snap(struct rbd_device *dev, - const char *snap_name, - gfp_t gfp_flags) -{ - int name_len = strlen(snap_name); - u64 new_snapid; - int ret; - void *data, *data_start, *data_end; - - /* we should create a snapshot only if we're pointing at the head */ - if (dev->cur_snap) - return -EINVAL; - - ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid, - &new_snapid); - dout("created snapid=%lld\n", new_snapid); - if (ret < 0) - return ret; - - data = kmalloc(name_len + 16, gfp_flags); - if (!data) - return -ENOMEM; - - data_start = data; - data_end = data + name_len + 16; - - ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad); - ceph_encode_64_safe(&data, data_end, new_snapid, bad); - - ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", - data_start, data - data_start); - - kfree(data_start); - - if (ret < 0) - return ret; - - dev->header.snapc->seq = new_snapid; - - return 0; -bad: - return -ERANGE; -} - -/* - * only read the first part of the ondisk header, without the snaps info - */ -static int rbd_update_snaps(struct rbd_device *rbd_dev) -{ - int ret; - struct rbd_image_header h; - u64 snap_seq; - - ret = rbd_read_header(rbd_dev, &h); - if (ret < 0) - return ret; - - down_write(&rbd_dev->header.snap_rwsem); - - snap_seq = rbd_dev->header.snapc->seq; - - kfree(rbd_dev->header.snapc); - kfree(rbd_dev->header.snap_names); - kfree(rbd_dev->header.snap_sizes); - - rbd_dev->header.total_snaps = h.total_snaps; - rbd_dev->header.snapc = h.snapc; - rbd_dev->header.snap_names = h.snap_names; - rbd_dev->header.snap_sizes = h.snap_sizes; - rbd_dev->header.snapc->seq = snap_seq; - - up_write(&rbd_dev->header.snap_rwsem); - - return 0; -} - -static int rbd_init_disk(struct rbd_device *rbd_dev) -{ - struct gendisk *disk; - struct request_queue *q; - int rc; - u64 total_size = 0; - - /* contact OSD, request size info about the object being mapped */ - rc = rbd_read_header(rbd_dev, &rbd_dev->header); - if (rc) - return rc; - - rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size); - if (rc) - return rc; - - /* create gendisk info */ - rc = -ENOMEM; - disk = alloc_disk(RBD_MINORS_PER_MAJOR); - if (!disk) - goto out; - - sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id); - disk->major = rbd_dev->major; - disk->first_minor = 0; - disk->fops = &rbd_bd_ops; - disk->private_data = rbd_dev; - - /* init rq */ - rc = -ENOMEM; - q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); - if (!q) - goto out_disk; - blk_queue_merge_bvec(q, rbd_merge_bvec); - disk->queue = q; - - q->queuedata = rbd_dev; - - rbd_dev->disk = disk; - rbd_dev->q = q; - - /* finally, announce the disk to the world */ - set_capacity(disk, total_size / 512ULL); - add_disk(disk); - - pr_info("%s: added with size 0x%llx\n", - disk->disk_name, (unsigned long long)total_size); - return 0; - -out_disk: - put_disk(disk); -out: - return rc; -} - -/******************************************************************** - * /sys/class/rbd/ - * add map rados objects to blkdev - * remove unmap rados objects - * list show mappings - *******************************************************************/ - -static void class_rbd_release(struct class *cls) -{ - kfree(cls); -} - -static ssize_t class_rbd_list(struct class *c, - struct class_attribute *attr, - char *data) -{ - int n = 0; - struct list_head *tmp; - int max = PAGE_SIZE; - - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - n += snprintf(data, max, - "#id\tmajor\tclient_name\tpool\tname\tsnap\tKB\n"); - - list_for_each(tmp, &rbd_dev_list) { - struct rbd_device *rbd_dev; - - rbd_dev = list_entry(tmp, struct rbd_device, node); - n += snprintf(data+n, max-n, - "%d\t%d\tclient%lld\t%s\t%s\t%s\t%lld\n", - rbd_dev->id, - rbd_dev->major, - ceph_client_id(rbd_dev->client), - rbd_dev->pool_name, - rbd_dev->obj, rbd_dev->snap_name, - rbd_dev->header.image_size >> 10); - if (n == max) - break; - } - - mutex_unlock(&ctl_mutex); - return n; -} - -static ssize_t class_rbd_add(struct class *c, - struct class_attribute *attr, - const char *buf, size_t count) -{ - struct ceph_osd_client *osdc; - struct rbd_device *rbd_dev; - ssize_t rc = -ENOMEM; - int irc, new_id = 0; - struct list_head *tmp; - char *mon_dev_name; - char *options; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - - mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); - if (!mon_dev_name) - goto err_out_mod; - - options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); - if (!options) - goto err_mon_dev; - - /* new rbd_device object */ - rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); - if (!rbd_dev) - goto err_out_opt; - - /* static rbd_device initialization */ - spin_lock_init(&rbd_dev->lock); - INIT_LIST_HEAD(&rbd_dev->node); - - /* generate unique id: find highest unique id, add one */ - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - list_for_each(tmp, &rbd_dev_list) { - struct rbd_device *rbd_dev; - - rbd_dev = list_entry(tmp, struct rbd_device, node); - if (rbd_dev->id >= new_id) - new_id = rbd_dev->id + 1; - } - - rbd_dev->id = new_id; - - /* add to global list */ - list_add_tail(&rbd_dev->node, &rbd_dev_list); - - /* parse add command */ - if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s " - "%" __stringify(RBD_MAX_OPT_LEN) "s " - "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s " - "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s" - "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", - mon_dev_name, options, rbd_dev->pool_name, - rbd_dev->obj, rbd_dev->snap_name) < 4) { - rc = -EINVAL; - goto err_out_slot; - } - - if (rbd_dev->snap_name[0] == 0) - rbd_dev->snap_name[0] = '-'; - - rbd_dev->obj_len = strlen(rbd_dev->obj); - snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s", - rbd_dev->obj, RBD_SUFFIX); - - /* initialize rest of new object */ - snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id); - rc = rbd_get_client(rbd_dev, mon_dev_name, options); - if (rc < 0) - goto err_out_slot; - - mutex_unlock(&ctl_mutex); - - /* pick the pool */ - osdc = &rbd_dev->client->osdc; - rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); - if (rc < 0) - goto err_out_client; - rbd_dev->poolid = rc; - - /* register our block device */ - irc = register_blkdev(0, rbd_dev->name); - if (irc < 0) { - rc = irc; - goto err_out_client; - } - rbd_dev->major = irc; - - /* set up and announce blkdev mapping */ - rc = rbd_init_disk(rbd_dev); - if (rc) - goto err_out_blkdev; - - return count; - -err_out_blkdev: - unregister_blkdev(rbd_dev->major, rbd_dev->name); -err_out_client: - rbd_put_client(rbd_dev); - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); -err_out_slot: - list_del_init(&rbd_dev->node); - mutex_unlock(&ctl_mutex); - - kfree(rbd_dev); -err_out_opt: - kfree(options); -err_mon_dev: - kfree(mon_dev_name); -err_out_mod: - dout("Error adding device %s\n", buf); - module_put(THIS_MODULE); - return rc; -} - -static struct rbd_device *__rbd_get_dev(unsigned long id) -{ - struct list_head *tmp; - struct rbd_device *rbd_dev; - - list_for_each(tmp, &rbd_dev_list) { - rbd_dev = list_entry(tmp, struct rbd_device, node); - if (rbd_dev->id == id) - return rbd_dev; - } - return NULL; -} - -static ssize_t class_rbd_remove(struct class *c, - struct class_attribute *attr, - const char *buf, - size_t count) -{ - struct rbd_device *rbd_dev = NULL; - int target_id, rc; - unsigned long ul; - - rc = strict_strtoul(buf, 10, &ul); - if (rc) - return rc; - - /* convert to int; abort if we lost anything in the conversion */ - target_id = (int) ul; - if (target_id != ul) - return -EINVAL; - - /* remove object from list immediately */ - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - rbd_dev = __rbd_get_dev(target_id); - if (rbd_dev) - list_del_init(&rbd_dev->node); - - mutex_unlock(&ctl_mutex); - - if (!rbd_dev) - return -ENOENT; - - rbd_put_client(rbd_dev); - - /* clean up and free blkdev */ - rbd_free_disk(rbd_dev); - unregister_blkdev(rbd_dev->major, rbd_dev->name); - kfree(rbd_dev); - - /* release module ref */ - module_put(THIS_MODULE); - - return count; -} - -static ssize_t class_rbd_snaps_list(struct class *c, - struct class_attribute *attr, - char *data) -{ - struct rbd_device *rbd_dev = NULL; - struct list_head *tmp; - struct rbd_image_header *header; - int i, n = 0, max = PAGE_SIZE; - int ret; - - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - n += snprintf(data, max, "#id\tsnap\tKB\n"); - - list_for_each(tmp, &rbd_dev_list) { - char *names, *p; - struct ceph_snap_context *snapc; - - rbd_dev = list_entry(tmp, struct rbd_device, node); - header = &rbd_dev->header; - - down_read(&header->snap_rwsem); - - names = header->snap_names; - snapc = header->snapc; - - n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n", - rbd_dev->id, RBD_SNAP_HEAD_NAME, - header->image_size >> 10, - (!rbd_dev->cur_snap ? " (*)" : "")); - if (n == max) - break; - - p = names; - for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { - n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n", - rbd_dev->id, p, header->snap_sizes[i] >> 10, - (rbd_dev->cur_snap && - (snap_index(header, i) == rbd_dev->cur_snap) ? - " (*)" : "")); - if (n == max) - break; - } - - up_read(&header->snap_rwsem); - } - - - ret = n; - mutex_unlock(&ctl_mutex); - return ret; -} - -static ssize_t class_rbd_snaps_refresh(struct class *c, - struct class_attribute *attr, - const char *buf, - size_t count) -{ - struct rbd_device *rbd_dev = NULL; - int target_id, rc; - unsigned long ul; - int ret = count; - - rc = strict_strtoul(buf, 10, &ul); - if (rc) - return rc; - - /* convert to int; abort if we lost anything in the conversion */ - target_id = (int) ul; - if (target_id != ul) - return -EINVAL; - - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - rbd_dev = __rbd_get_dev(target_id); - if (!rbd_dev) { - ret = -ENOENT; - goto done; - } - - rc = rbd_update_snaps(rbd_dev); - if (rc < 0) - ret = rc; - -done: - mutex_unlock(&ctl_mutex); - return ret; -} - -static ssize_t class_rbd_snap_create(struct class *c, - struct class_attribute *attr, - const char *buf, - size_t count) -{ - struct rbd_device *rbd_dev = NULL; - int target_id, ret; - char *name; - - name = kmalloc(RBD_MAX_SNAP_NAME_LEN + 1, GFP_KERNEL); - if (!name) - return -ENOMEM; - - /* parse snaps add command */ - if (sscanf(buf, "%d " - "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", - &target_id, - name) != 2) { - ret = -EINVAL; - goto done; - } - - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - rbd_dev = __rbd_get_dev(target_id); - if (!rbd_dev) { - ret = -ENOENT; - goto done_unlock; - } - - ret = rbd_header_add_snap(rbd_dev, - name, GFP_KERNEL); - if (ret < 0) - goto done_unlock; - - ret = rbd_update_snaps(rbd_dev); - if (ret < 0) - goto done_unlock; - - ret = count; -done_unlock: - mutex_unlock(&ctl_mutex); -done: - kfree(name); - return ret; -} - -static ssize_t class_rbd_rollback(struct class *c, - struct class_attribute *attr, - const char *buf, - size_t count) -{ - struct rbd_device *rbd_dev = NULL; - int target_id, ret; - u64 snapid; - char snap_name[RBD_MAX_SNAP_NAME_LEN]; - u64 cur_ofs; - char *seg_name; - - /* parse snaps add command */ - if (sscanf(buf, "%d " - "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", - &target_id, - snap_name) != 2) { - return -EINVAL; - } - - ret = -ENOMEM; - seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); - if (!seg_name) - return ret; - - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - rbd_dev = __rbd_get_dev(target_id); - if (!rbd_dev) { - ret = -ENOENT; - goto done_unlock; - } - - ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL); - if (ret < 0) - goto done_unlock; - - dout("snapid=%lld\n", snapid); - - cur_ofs = 0; - while (cur_ofs < rbd_dev->header.image_size) { - cur_ofs += rbd_get_segment(&rbd_dev->header, - rbd_dev->obj, - cur_ofs, (u64)-1, - seg_name, NULL); - dout("seg_name=%s\n", seg_name); - - ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name); - if (ret < 0) - pr_warning("could not roll back obj %s err=%d\n", - seg_name, ret); - } - - ret = rbd_update_snaps(rbd_dev); - if (ret < 0) - goto done_unlock; - - ret = count; - -done_unlock: - mutex_unlock(&ctl_mutex); - kfree(seg_name); - - return ret; -} - -static struct class_attribute class_rbd_attrs[] = { - __ATTR(add, 0200, NULL, class_rbd_add), - __ATTR(remove, 0200, NULL, class_rbd_remove), - __ATTR(list, 0444, class_rbd_list, NULL), - __ATTR(snaps_refresh, 0200, NULL, class_rbd_snaps_refresh), - __ATTR(snap_create, 0200, NULL, class_rbd_snap_create), - __ATTR(snaps_list, 0444, class_rbd_snaps_list, NULL), - __ATTR(snap_rollback, 0200, NULL, class_rbd_rollback), - __ATTR_NULL -}; - -/* - * create control files in sysfs - * /sys/class/rbd/... - */ -static int rbd_sysfs_init(void) -{ - int ret = -ENOMEM; - - class_rbd = kzalloc(sizeof(*class_rbd), GFP_KERNEL); - if (!class_rbd) - goto out; - - class_rbd->name = DRV_NAME; - class_rbd->owner = THIS_MODULE; - class_rbd->class_release = class_rbd_release; - class_rbd->class_attrs = class_rbd_attrs; - - ret = class_register(class_rbd); - if (ret) - goto out_class; - return 0; - -out_class: - kfree(class_rbd); - class_rbd = NULL; - pr_err(DRV_NAME ": failed to create class rbd\n"); -out: - return ret; -} - -static void rbd_sysfs_cleanup(void) -{ - if (class_rbd) - class_destroy(class_rbd); - class_rbd = NULL; -} - -int __init rbd_init(void) -{ - int rc; - - rc = rbd_sysfs_init(); - if (rc) - return rc; - spin_lock_init(&node_lock); - pr_info("loaded " DRV_NAME_LONG "\n"); - return 0; -} - -void __exit rbd_exit(void) -{ - rbd_sysfs_cleanup(); -} - -module_init(rbd_init); -module_exit(rbd_exit); - -MODULE_AUTHOR("Sage Weil "); -MODULE_AUTHOR("Yehuda Sadeh "); -MODULE_DESCRIPTION("rados block device"); - -/* following authorship retained from original osdblk.c */ -MODULE_AUTHOR("Jeff Garzik "); - -MODULE_LICENSE("GPL"); diff --git a/trunk/drivers/block/rbd_types.h b/trunk/drivers/block/rbd_types.h deleted file mode 100644 index fc6c678aa2cb..000000000000 --- a/trunk/drivers/block/rbd_types.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2004-2010 Sage Weil - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#ifndef CEPH_RBD_TYPES_H -#define CEPH_RBD_TYPES_H - -#include - -/* - * rbd image 'foo' consists of objects - * foo.rbd - image metadata - * foo.00000000 - * foo.00000001 - * ... - data - */ - -#define RBD_SUFFIX ".rbd" -#define RBD_DIRECTORY "rbd_directory" -#define RBD_INFO "rbd_info" - -#define RBD_DEFAULT_OBJ_ORDER 22 /* 4MB */ -#define RBD_MIN_OBJ_ORDER 16 -#define RBD_MAX_OBJ_ORDER 30 - -#define RBD_MAX_OBJ_NAME_LEN 96 -#define RBD_MAX_SEG_NAME_LEN 128 - -#define RBD_COMP_NONE 0 -#define RBD_CRYPT_NONE 0 - -#define RBD_HEADER_TEXT "<<< Rados Block Device Image >>>\n" -#define RBD_HEADER_SIGNATURE "RBD" -#define RBD_HEADER_VERSION "001.005" - -struct rbd_info { - __le64 max_id; -} __attribute__ ((packed)); - -struct rbd_image_snap_ondisk { - __le64 id; - __le64 image_size; -} __attribute__((packed)); - -struct rbd_image_header_ondisk { - char text[40]; - char block_name[24]; - char signature[4]; - char version[8]; - struct { - __u8 order; - __u8 crypt_type; - __u8 comp_type; - __u8 unused; - } __attribute__((packed)) options; - __le64 image_size; - __le64 snap_seq; - __le32 snap_count; - __le32 reserved; - __le64 snap_names_len; - struct rbd_image_snap_ondisk snaps[0]; -} __attribute__((packed)); - - -#endif diff --git a/trunk/drivers/block/virtio_blk.c b/trunk/drivers/block/virtio_blk.c index 8320490226b7..1101e251a629 100644 --- a/trunk/drivers/block/virtio_blk.c +++ b/trunk/drivers/block/virtio_blk.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -221,8 +222,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) return err; } -static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long data) +static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, + unsigned cmd, unsigned long data) { struct gendisk *disk = bdev->bd_disk; struct virtio_blk *vblk = disk->private_data; @@ -237,6 +238,18 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, (void __user *)data); } +static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long param) +{ + int ret; + + lock_kernel(); + ret = virtblk_locked_ioctl(bdev, mode, cmd, param); + unlock_kernel(); + + return ret; +} + /* We provide getgeo only to please some old bootloader/partitioning tools */ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) { diff --git a/trunk/drivers/char/virtio_console.c b/trunk/drivers/char/virtio_console.c index 6c1b676643a9..0f69c5ec0ecd 100644 --- a/trunk/drivers/char/virtio_console.c +++ b/trunk/drivers/char/virtio_console.c @@ -48,9 +48,6 @@ struct ports_driver_data { /* Used for exporting per-port information to debugfs */ struct dentry *debugfs_dir; - /* List of all the devices we're handling */ - struct list_head portdevs; - /* Number of devices this driver is handling */ unsigned int index; @@ -111,9 +108,6 @@ struct port_buffer { * ports for that device (vdev->priv). */ struct ports_device { - /* Next portdev in the list, head is in the pdrvdata struct */ - struct list_head list; - /* * Workqueue handlers where we process deferred work after * notification @@ -184,21 +178,15 @@ struct port { struct console cons; /* Each port associates with a separate char device */ - struct cdev *cdev; + struct cdev cdev; struct device *dev; - /* Reference-counting to handle port hot-unplugs and file operations */ - struct kref kref; - /* A waitqueue for poll() or blocking read operations */ wait_queue_head_t waitqueue; /* The 'name' of the port that we expose via sysfs properties */ char *name; - /* We can notify apps of host connect / disconnect events via SIGIO */ - struct fasync_struct *async_queue; - /* The 'id' to identify the port with the Host */ u32 id; @@ -233,41 +221,6 @@ static struct port *find_port_by_vtermno(u32 vtermno) return port; } -static struct port *find_port_by_devt_in_portdev(struct ports_device *portdev, - dev_t dev) -{ - struct port *port; - unsigned long flags; - - spin_lock_irqsave(&portdev->ports_lock, flags); - list_for_each_entry(port, &portdev->ports, list) - if (port->cdev->dev == dev) - goto out; - port = NULL; -out: - spin_unlock_irqrestore(&portdev->ports_lock, flags); - - return port; -} - -static struct port *find_port_by_devt(dev_t dev) -{ - struct ports_device *portdev; - struct port *port; - unsigned long flags; - - spin_lock_irqsave(&pdrvdata_lock, flags); - list_for_each_entry(portdev, &pdrvdata.portdevs, list) { - port = find_port_by_devt_in_portdev(portdev, dev); - if (port) - goto out; - } - port = NULL; -out: - spin_unlock_irqrestore(&pdrvdata_lock, flags); - return port; -} - static struct port *find_port_by_id(struct ports_device *portdev, u32 id) { struct port *port; @@ -457,10 +410,7 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id, static ssize_t send_control_msg(struct port *port, unsigned int event, unsigned int value) { - /* Did the port get unplugged before userspace closed it? */ - if (port->portdev) - return __send_control_msg(port->portdev, port->id, event, value); - return 0; + return __send_control_msg(port->portdev, port->id, event, value); } /* Callers must take the port->outvq_lock */ @@ -575,10 +525,6 @@ static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count, /* The condition that must be true for polling to end */ static bool will_read_block(struct port *port) { - if (!port->guest_connected) { - /* Port got hot-unplugged. Let's exit. */ - return false; - } return !port_has_data(port) && port->host_connected; } @@ -629,9 +575,6 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf, if (ret < 0) return ret; } - /* Port got hot-unplugged. */ - if (!port->guest_connected) - return -ENODEV; /* * We could've received a disconnection message while we were * waiting for more data. @@ -673,9 +616,6 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, if (ret < 0) return ret; } - /* Port got hot-unplugged. */ - if (!port->guest_connected) - return -ENODEV; count = min((size_t)(32 * 1024), count); @@ -716,10 +656,6 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait) port = filp->private_data; poll_wait(filp, &port->waitqueue, wait); - if (!port->guest_connected) { - /* Port got unplugged */ - return POLLHUP; - } ret = 0; if (!will_read_block(port)) ret |= POLLIN | POLLRDNORM; @@ -731,8 +667,6 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait) return ret; } -static void remove_port(struct kref *kref); - static int port_fops_release(struct inode *inode, struct file *filp) { struct port *port; @@ -753,16 +687,6 @@ static int port_fops_release(struct inode *inode, struct file *filp) reclaim_consumed_buffers(port); spin_unlock_irq(&port->outvq_lock); - /* - * Locks aren't necessary here as a port can't be opened after - * unplug, and if a port isn't unplugged, a kref would already - * exist for the port. Plus, taking ports_lock here would - * create a dependency on other locks taken by functions - * inside remove_port if we're the last holder of the port, - * creating many problems. - */ - kref_put(&port->kref, remove_port); - return 0; } @@ -770,31 +694,22 @@ static int port_fops_open(struct inode *inode, struct file *filp) { struct cdev *cdev = inode->i_cdev; struct port *port; - int ret; - port = find_port_by_devt(cdev->dev); + port = container_of(cdev, struct port, cdev); filp->private_data = port; - /* Prevent against a port getting hot-unplugged at the same time */ - spin_lock_irq(&port->portdev->ports_lock); - kref_get(&port->kref); - spin_unlock_irq(&port->portdev->ports_lock); - /* * Don't allow opening of console port devices -- that's done * via /dev/hvc */ - if (is_console_port(port)) { - ret = -ENXIO; - goto out; - } + if (is_console_port(port)) + return -ENXIO; /* Allow only one process to open a particular port at a time */ spin_lock_irq(&port->inbuf_lock); if (port->guest_connected) { spin_unlock_irq(&port->inbuf_lock); - ret = -EMFILE; - goto out; + return -EMFILE; } port->guest_connected = true; @@ -809,23 +724,10 @@ static int port_fops_open(struct inode *inode, struct file *filp) reclaim_consumed_buffers(port); spin_unlock_irq(&port->outvq_lock); - nonseekable_open(inode, filp); - /* Notify host of port being opened */ send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1); return 0; -out: - kref_put(&port->kref, remove_port); - return ret; -} - -static int port_fops_fasync(int fd, struct file *filp, int mode) -{ - struct port *port; - - port = filp->private_data; - return fasync_helper(fd, filp, mode, &port->async_queue); } /* @@ -841,8 +743,6 @@ static const struct file_operations port_fops = { .write = port_fops_write, .poll = port_fops_poll, .release = port_fops_release, - .fasync = port_fops_fasync, - .llseek = no_llseek, }; /* @@ -1101,12 +1001,6 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) return nr_added_bufs; } -static void send_sigio_to_port(struct port *port) -{ - if (port->async_queue && port->guest_connected) - kill_fasync(&port->async_queue, SIGIO, POLL_OUT); -} - static int add_port(struct ports_device *portdev, u32 id) { char debugfs_name[16]; @@ -1121,7 +1015,6 @@ static int add_port(struct ports_device *portdev, u32 id) err = -ENOMEM; goto fail; } - kref_init(&port->kref); port->portdev = portdev; port->id = id; @@ -1129,7 +1022,6 @@ static int add_port(struct ports_device *portdev, u32 id) port->name = NULL; port->inbuf = NULL; port->cons.hvc = NULL; - port->async_queue = NULL; port->cons.ws.ws_row = port->cons.ws.ws_col = 0; @@ -1140,20 +1032,14 @@ static int add_port(struct ports_device *portdev, u32 id) port->in_vq = portdev->in_vqs[port->id]; port->out_vq = portdev->out_vqs[port->id]; - port->cdev = cdev_alloc(); - if (!port->cdev) { - dev_err(&port->portdev->vdev->dev, "Error allocating cdev\n"); - err = -ENOMEM; - goto free_port; - } - port->cdev->ops = &port_fops; + cdev_init(&port->cdev, &port_fops); devt = MKDEV(portdev->chr_major, id); - err = cdev_add(port->cdev, devt, 1); + err = cdev_add(&port->cdev, devt, 1); if (err < 0) { dev_err(&port->portdev->vdev->dev, "Error %d adding cdev for port %u\n", err, id); - goto free_cdev; + goto free_port; } port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev, devt, port, "vport%up%u", @@ -1218,7 +1104,7 @@ static int add_port(struct ports_device *portdev, u32 id) free_device: device_destroy(pdrvdata.class, port->dev->devt); free_cdev: - cdev_del(port->cdev); + cdev_del(&port->cdev); free_port: kfree(port); fail: @@ -1227,46 +1113,22 @@ static int add_port(struct ports_device *portdev, u32 id) return err; } -/* No users remain, remove all port-specific data. */ -static void remove_port(struct kref *kref) -{ - struct port *port; - - port = container_of(kref, struct port, kref); - - sysfs_remove_group(&port->dev->kobj, &port_attribute_group); - device_destroy(pdrvdata.class, port->dev->devt); - cdev_del(port->cdev); - - kfree(port->name); - - debugfs_remove(port->debugfs_file); - - kfree(port); -} - -/* - * Port got unplugged. Remove port from portdev's list and drop the - * kref reference. If no userspace has this port opened, it will - * result in immediate removal the port. - */ -static void unplug_port(struct port *port) +/* Remove all port-specific data. */ +static int remove_port(struct port *port) { struct port_buffer *buf; - spin_lock_irq(&port->portdev->ports_lock); - list_del(&port->list); - spin_unlock_irq(&port->portdev->ports_lock); - if (port->guest_connected) { port->guest_connected = false; port->host_connected = false; wake_up_interruptible(&port->waitqueue); - - /* Let the app know the port is going down. */ - send_sigio_to_port(port); + send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0); } + spin_lock_irq(&port->portdev->ports_lock); + list_del(&port->list); + spin_unlock_irq(&port->portdev->ports_lock); + if (is_console_port(port)) { spin_lock_irq(&pdrvdata_lock); list_del(&port->cons.list); @@ -1284,6 +1146,9 @@ static void unplug_port(struct port *port) hvc_remove(port->cons.hvc); #endif } + sysfs_remove_group(&port->dev->kobj, &port_attribute_group); + device_destroy(pdrvdata.class, port->dev->devt); + cdev_del(&port->cdev); /* Remove unused data this port might have received. */ discard_port_data(port); @@ -1294,19 +1159,12 @@ static void unplug_port(struct port *port) while ((buf = virtqueue_detach_unused_buf(port->in_vq))) free_buf(buf); - /* - * We should just assume the device itself has gone off -- - * else a close on an open port later will try to send out a - * control message. - */ - port->portdev = NULL; + kfree(port->name); - /* - * Locks around here are not necessary - a port can't be - * opened after we removed the port struct from ports_list - * above. - */ - kref_put(&port->kref, remove_port); + debugfs_remove(port->debugfs_file); + + kfree(port); + return 0; } /* Any private messages that the Host and Guest want to share */ @@ -1345,7 +1203,7 @@ static void handle_control_message(struct ports_device *portdev, add_port(portdev, cpkt->id); break; case VIRTIO_CONSOLE_PORT_REMOVE: - unplug_port(port); + remove_port(port); break; case VIRTIO_CONSOLE_CONSOLE_PORT: if (!cpkt->value) @@ -1387,12 +1245,6 @@ static void handle_control_message(struct ports_device *portdev, spin_lock_irq(&port->outvq_lock); reclaim_consumed_buffers(port); spin_unlock_irq(&port->outvq_lock); - - /* - * If the guest is connected, it'll be interested in - * knowing the host connection state changed. - */ - send_sigio_to_port(port); break; case VIRTIO_CONSOLE_PORT_NAME: /* @@ -1489,9 +1341,6 @@ static void in_intr(struct virtqueue *vq) wake_up_interruptible(&port->waitqueue); - /* Send a SIGIO indicating new data in case the process asked for it */ - send_sigio_to_port(port); - if (is_console_port(port) && hvc_poll(port->cons.hvc)) hvc_kick(); } @@ -1728,10 +1577,6 @@ static int __devinit virtcons_probe(struct virtio_device *vdev) add_port(portdev, 0); } - spin_lock_irq(&pdrvdata_lock); - list_add_tail(&portdev->list, &pdrvdata.portdevs); - spin_unlock_irq(&pdrvdata_lock); - __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, VIRTIO_CONSOLE_DEVICE_READY, 1); return 0; @@ -1755,41 +1600,23 @@ static void virtcons_remove(struct virtio_device *vdev) { struct ports_device *portdev; struct port *port, *port2; + struct port_buffer *buf; + unsigned int len; portdev = vdev->priv; - spin_lock_irq(&pdrvdata_lock); - list_del(&portdev->list); - spin_unlock_irq(&pdrvdata_lock); - - /* Disable interrupts for vqs */ - vdev->config->reset(vdev); - /* Finish up work that's lined up */ cancel_work_sync(&portdev->control_work); list_for_each_entry_safe(port, port2, &portdev->ports, list) - unplug_port(port); + remove_port(port); unregister_chrdev(portdev->chr_major, "virtio-portsdev"); - /* - * When yanking out a device, we immediately lose the - * (device-side) queues. So there's no point in keeping the - * guest side around till we drop our final reference. This - * also means that any ports which are in an open state will - * have to just stop using the port, as the vqs are going - * away. - */ - if (use_multiport(portdev)) { - struct port_buffer *buf; - unsigned int len; - - while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) - free_buf(buf); + while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) + free_buf(buf); - while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) - free_buf(buf); - } + while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) + free_buf(buf); vdev->config->del_vqs(vdev); kfree(portdev->in_vqs); @@ -1836,7 +1663,6 @@ static int __init init(void) PTR_ERR(pdrvdata.debugfs_dir)); } INIT_LIST_HEAD(&pdrvdata.consoles); - INIT_LIST_HEAD(&pdrvdata.portdevs); return register_virtio_driver(&virtio_console); } diff --git a/trunk/fs/ceph/Kconfig b/trunk/fs/ceph/Kconfig index 9eb134ea6eb2..0fcd2640c23f 100644 --- a/trunk/fs/ceph/Kconfig +++ b/trunk/fs/ceph/Kconfig @@ -1,11 +1,9 @@ config CEPH_FS tristate "Ceph distributed file system (EXPERIMENTAL)" depends on INET && EXPERIMENTAL - select CEPH_LIB select LIBCRC32C select CRYPTO_AES select CRYPTO - default n help Choose Y or M here to include support for mounting the experimental Ceph distributed file system. Ceph is an extremely @@ -16,3 +14,15 @@ config CEPH_FS If unsure, say N. +config CEPH_FS_PRETTYDEBUG + bool "Include file:line in ceph debug output" + depends on CEPH_FS + default n + help + If you say Y here, debug output will include a filename and + line to aid debugging. This icnreases kernel size and slows + execution slightly when debug call sites are enabled (e.g., + via CONFIG_DYNAMIC_DEBUG). + + If unsure, say N. + diff --git a/trunk/fs/ceph/Makefile b/trunk/fs/ceph/Makefile index 9e6c4f2e8ff1..278e1172600d 100644 --- a/trunk/fs/ceph/Makefile +++ b/trunk/fs/ceph/Makefile @@ -8,8 +8,15 @@ obj-$(CONFIG_CEPH_FS) += ceph.o ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ export.o caps.o snap.o xattr.o \ - mds_client.o mdsmap.o strings.o ceph_frag.o \ - debugfs.o + messenger.o msgpool.o buffer.o pagelist.o \ + mds_client.o mdsmap.o \ + mon_client.o \ + osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ + debugfs.o \ + auth.o auth_none.o \ + crypto.o armor.o \ + auth_x.o \ + ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o else #Otherwise we were called directly from the command diff --git a/trunk/fs/ceph/README b/trunk/fs/ceph/README new file mode 100644 index 000000000000..18352fab37c0 --- /dev/null +++ b/trunk/fs/ceph/README @@ -0,0 +1,20 @@ +# +# The following files are shared by (and manually synchronized +# between) the Ceph userland and kernel client. +# +# userland kernel +src/include/ceph_fs.h fs/ceph/ceph_fs.h +src/include/ceph_fs.cc fs/ceph/ceph_fs.c +src/include/msgr.h fs/ceph/msgr.h +src/include/rados.h fs/ceph/rados.h +src/include/ceph_strings.cc fs/ceph/ceph_strings.c +src/include/ceph_frag.h fs/ceph/ceph_frag.h +src/include/ceph_frag.cc fs/ceph/ceph_frag.c +src/include/ceph_hash.h fs/ceph/ceph_hash.h +src/include/ceph_hash.cc fs/ceph/ceph_hash.c +src/crush/crush.c fs/ceph/crush/crush.c +src/crush/crush.h fs/ceph/crush/crush.h +src/crush/mapper.c fs/ceph/crush/mapper.c +src/crush/mapper.h fs/ceph/crush/mapper.h +src/crush/hash.h fs/ceph/crush/hash.h +src/crush/hash.c fs/ceph/crush/hash.c diff --git a/trunk/fs/ceph/addr.c b/trunk/fs/ceph/addr.c index 51bcc5ce3230..efbc604001c8 100644 --- a/trunk/fs/ceph/addr.c +++ b/trunk/fs/ceph/addr.c @@ -1,4 +1,4 @@ -#include +#include "ceph_debug.h" #include #include @@ -10,8 +10,7 @@ #include #include "super.h" -#include "mds_client.h" -#include +#include "osd_client.h" /* * Ceph address space ops. @@ -194,8 +193,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) { struct inode *inode = filp->f_dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_osd_client *osdc = - &ceph_inode_to_client(inode)->client->osdc; + struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; int err = 0; u64 len = PAGE_CACHE_SIZE; @@ -267,8 +265,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, { struct inode *inode = file->f_dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_osd_client *osdc = - &ceph_inode_to_client(inode)->client->osdc; + struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; int rc = 0; struct page **pages; loff_t offset; @@ -368,7 +365,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) { struct inode *inode; struct ceph_inode_info *ci; - struct ceph_fs_client *fsc; + struct ceph_client *client; struct ceph_osd_client *osdc; loff_t page_off = page->index << PAGE_CACHE_SHIFT; int len = PAGE_CACHE_SIZE; @@ -386,8 +383,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) } inode = page->mapping->host; ci = ceph_inode(inode); - fsc = ceph_inode_to_client(inode); - osdc = &fsc->client->osdc; + client = ceph_inode_to_client(inode); + osdc = &client->osdc; /* verify this is a writeable snap context */ snapc = (void *)page->private; @@ -417,10 +414,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", inode, page, page->index, page_off, len, snapc); - writeback_stat = atomic_long_inc_return(&fsc->writeback_count); + writeback_stat = atomic_long_inc_return(&client->writeback_count); if (writeback_stat > - CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) - set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); + CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) + set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); set_page_writeback(page); err = ceph_osdc_writepages(osdc, ceph_vino(inode), @@ -499,7 +496,7 @@ static void writepages_finish(struct ceph_osd_request *req, struct address_space *mapping = inode->i_mapping; __s32 rc = -EIO; u64 bytes = 0; - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_client *client = ceph_inode_to_client(inode); long writeback_stat; unsigned issued = ceph_caps_issued(ci); @@ -532,10 +529,10 @@ static void writepages_finish(struct ceph_osd_request *req, WARN_ON(!PageUptodate(page)); writeback_stat = - atomic_long_dec_return(&fsc->writeback_count); + atomic_long_dec_return(&client->writeback_count); if (writeback_stat < - CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb)) - clear_bdi_congested(&fsc->backing_dev_info, + CONGESTION_OFF_THRESH(client->mount_args->congestion_kb)) + clear_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); ceph_put_snap_context((void *)page->private); @@ -572,13 +569,13 @@ static void writepages_finish(struct ceph_osd_request *req, * mempool. we avoid the mempool if we can because req->r_num_pages * may be less than the maximum write size. */ -static void alloc_page_vec(struct ceph_fs_client *fsc, +static void alloc_page_vec(struct ceph_client *client, struct ceph_osd_request *req) { req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages, GFP_NOFS); if (!req->r_pages) { - req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS); + req->r_pages = mempool_alloc(client->wb_pagevec_pool, GFP_NOFS); req->r_pages_from_pool = 1; WARN_ON(!req->r_pages); } @@ -593,7 +590,7 @@ static int ceph_writepages_start(struct address_space *mapping, struct inode *inode = mapping->host; struct backing_dev_info *bdi = mapping->backing_dev_info; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc; + struct ceph_client *client; pgoff_t index, start, end; int range_whole = 0; int should_loop = 1; @@ -620,13 +617,13 @@ static int ceph_writepages_start(struct address_space *mapping, wbc->sync_mode == WB_SYNC_NONE ? "NONE" : (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); - fsc = ceph_inode_to_client(inode); - if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { + client = ceph_inode_to_client(inode); + if (client->mount_state == CEPH_MOUNT_SHUTDOWN) { pr_warning("writepage_start %p on forced umount\n", inode); return -EIO; /* we're in a forced umount, don't write! */ } - if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) - wsize = fsc->mount_options->wsize; + if (client->mount_args->wsize && client->mount_args->wsize < wsize) + wsize = client->mount_args->wsize; if (wsize < PAGE_CACHE_SIZE) wsize = PAGE_CACHE_SIZE; max_pages_ever = wsize >> PAGE_CACHE_SHIFT; @@ -772,7 +769,7 @@ static int ceph_writepages_start(struct address_space *mapping, offset = (unsigned long long)page->index << PAGE_CACHE_SHIFT; len = wsize; - req = ceph_osdc_new_request(&fsc->client->osdc, + req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, ceph_vino(inode), offset, &len, @@ -785,7 +782,7 @@ static int ceph_writepages_start(struct address_space *mapping, &inode->i_mtime, true, 1); max_pages = req->r_num_pages; - alloc_page_vec(fsc, req); + alloc_page_vec(client, req); req->r_callback = writepages_finish; req->r_inode = inode; } @@ -797,10 +794,10 @@ static int ceph_writepages_start(struct address_space *mapping, inode, page, page->index); writeback_stat = - atomic_long_inc_return(&fsc->writeback_count); + atomic_long_inc_return(&client->writeback_count); if (writeback_stat > CONGESTION_ON_THRESH( - fsc->mount_options->congestion_kb)) { - set_bdi_congested(&fsc->backing_dev_info, + client->mount_args->congestion_kb)) { + set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); } @@ -849,7 +846,7 @@ static int ceph_writepages_start(struct address_space *mapping, op->payload_len = cpu_to_le32(len); req->r_request->hdr.data_len = cpu_to_le32(len); - ceph_osdc_start_request(&fsc->client->osdc, req, true); + ceph_osdc_start_request(&client->osdc, req, true); req = NULL; /* continue? */ @@ -918,7 +915,7 @@ static int ceph_update_writeable_page(struct file *file, { struct inode *inode = file->f_dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; loff_t page_off = pos & PAGE_CACHE_MASK; int pos_in_page = pos & ~PAGE_CACHE_MASK; int end_in_page = pos_in_page + len; @@ -1056,8 +1053,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct inode *inode = file->f_dentry->d_inode; - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_inode_to_client(inode); + struct ceph_mds_client *mdsc = &client->mdsc; unsigned from = pos & (PAGE_CACHE_SIZE - 1); int check_cap = 0; @@ -1126,7 +1123,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct inode *inode = vma->vm_file->f_dentry->d_inode; struct page *page = vmf->page; - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; loff_t off = page->index << PAGE_CACHE_SHIFT; loff_t size, len; int ret; diff --git a/trunk/net/ceph/armor.c b/trunk/fs/ceph/armor.c similarity index 100% rename from trunk/net/ceph/armor.c rename to trunk/fs/ceph/armor.c diff --git a/trunk/net/ceph/auth.c b/trunk/fs/ceph/auth.c similarity index 97% rename from trunk/net/ceph/auth.c rename to trunk/fs/ceph/auth.c index 549c1f43e1d5..6d2e30600627 100644 --- a/trunk/net/ceph/auth.c +++ b/trunk/fs/ceph/auth.c @@ -1,16 +1,16 @@ -#include +#include "ceph_debug.h" #include #include #include -#include -#include -#include -#include +#include "types.h" #include "auth_none.h" #include "auth_x.h" +#include "decode.h" +#include "super.h" +#include "messenger.h" /* * get protocol handler diff --git a/trunk/include/linux/ceph/auth.h b/trunk/fs/ceph/auth.h similarity index 97% rename from trunk/include/linux/ceph/auth.h rename to trunk/fs/ceph/auth.h index 7fff521d7eb5..d38a2fb4a137 100644 --- a/trunk/include/linux/ceph/auth.h +++ b/trunk/fs/ceph/auth.h @@ -1,8 +1,8 @@ #ifndef _FS_CEPH_AUTH_H #define _FS_CEPH_AUTH_H -#include -#include +#include "types.h" +#include "buffer.h" /* * Abstract interface for communicating with the authenticate module. diff --git a/trunk/net/ceph/auth_none.c b/trunk/fs/ceph/auth_none.c similarity index 96% rename from trunk/net/ceph/auth_none.c rename to trunk/fs/ceph/auth_none.c index 214c2bb43d62..ad1dc21286c7 100644 --- a/trunk/net/ceph/auth_none.c +++ b/trunk/fs/ceph/auth_none.c @@ -1,15 +1,14 @@ -#include +#include "ceph_debug.h" #include #include #include #include -#include -#include - #include "auth_none.h" +#include "auth.h" +#include "decode.h" static void reset(struct ceph_auth_client *ac) { diff --git a/trunk/net/ceph/auth_none.h b/trunk/fs/ceph/auth_none.h similarity index 94% rename from trunk/net/ceph/auth_none.h rename to trunk/fs/ceph/auth_none.h index ed7d088b1bc9..8164df1a08be 100644 --- a/trunk/net/ceph/auth_none.h +++ b/trunk/fs/ceph/auth_none.h @@ -2,7 +2,8 @@ #define _FS_CEPH_AUTH_NONE_H #include -#include + +#include "auth.h" /* * null security mode. diff --git a/trunk/net/ceph/auth_x.c b/trunk/fs/ceph/auth_x.c similarity index 99% rename from trunk/net/ceph/auth_x.c rename to trunk/fs/ceph/auth_x.c index 7fd5dfcf6e18..a2d002cbdec2 100644 --- a/trunk/net/ceph/auth_x.c +++ b/trunk/fs/ceph/auth_x.c @@ -1,17 +1,16 @@ -#include +#include "ceph_debug.h" #include #include #include #include -#include -#include - -#include "crypto.h" #include "auth_x.h" #include "auth_x_protocol.h" +#include "crypto.h" +#include "auth.h" +#include "decode.h" #define TEMP_TICKET_BUF_LEN 256 diff --git a/trunk/net/ceph/auth_x.h b/trunk/fs/ceph/auth_x.h similarity index 96% rename from trunk/net/ceph/auth_x.h rename to trunk/fs/ceph/auth_x.h index e02da7a5c5a1..ff6f8180e681 100644 --- a/trunk/net/ceph/auth_x.h +++ b/trunk/fs/ceph/auth_x.h @@ -3,9 +3,8 @@ #include -#include - #include "crypto.h" +#include "auth.h" #include "auth_x_protocol.h" /* diff --git a/trunk/net/ceph/auth_x_protocol.h b/trunk/fs/ceph/auth_x_protocol.h similarity index 100% rename from trunk/net/ceph/auth_x_protocol.h rename to trunk/fs/ceph/auth_x_protocol.h diff --git a/trunk/net/ceph/buffer.c b/trunk/fs/ceph/buffer.c similarity index 86% rename from trunk/net/ceph/buffer.c rename to trunk/fs/ceph/buffer.c index 53d8abfa25d5..cd39f17021de 100644 --- a/trunk/net/ceph/buffer.c +++ b/trunk/fs/ceph/buffer.c @@ -1,11 +1,10 @@ -#include +#include "ceph_debug.h" -#include #include -#include -#include +#include "buffer.h" +#include "decode.h" struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) { @@ -33,7 +32,6 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) dout("buffer_new %p\n", b); return b; } -EXPORT_SYMBOL(ceph_buffer_new); void ceph_buffer_release(struct kref *kref) { @@ -48,7 +46,6 @@ void ceph_buffer_release(struct kref *kref) } kfree(b); } -EXPORT_SYMBOL(ceph_buffer_release); int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) { diff --git a/trunk/include/linux/ceph/buffer.h b/trunk/fs/ceph/buffer.h similarity index 100% rename from trunk/include/linux/ceph/buffer.h rename to trunk/fs/ceph/buffer.h diff --git a/trunk/fs/ceph/caps.c b/trunk/fs/ceph/caps.c index 98ab13e2b71d..5e9da996a151 100644 --- a/trunk/fs/ceph/caps.c +++ b/trunk/fs/ceph/caps.c @@ -1,4 +1,4 @@ -#include +#include "ceph_debug.h" #include #include @@ -9,9 +9,8 @@ #include #include "super.h" -#include "mds_client.h" -#include -#include +#include "decode.h" +#include "messenger.h" /* * Capability management @@ -288,11 +287,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) spin_unlock(&mdsc->caps_list_lock); } -void ceph_reservation_status(struct ceph_fs_client *fsc, +void ceph_reservation_status(struct ceph_client *client, int *total, int *avail, int *used, int *reserved, int *min) { - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = &client->mdsc; if (total) *total = mdsc->caps_total_count; @@ -400,7 +399,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci, static void __cap_set_timeouts(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { - struct ceph_mount_options *ma = mdsc->fsc->mount_options; + struct ceph_mount_args *ma = mdsc->client->mount_args; ci->i_hold_caps_min = round_jiffies(jiffies + ma->caps_wanted_delay_min * HZ); @@ -516,7 +515,7 @@ int ceph_add_cap(struct inode *inode, unsigned seq, unsigned mseq, u64 realmino, int flags, struct ceph_cap_reservation *caps_reservation) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap *new_cap = NULL; struct ceph_cap *cap; @@ -874,7 +873,7 @@ void __ceph_remove_cap(struct ceph_cap *cap) struct ceph_mds_session *session = cap->session; struct ceph_inode_info *ci = cap->ci; struct ceph_mds_client *mdsc = - ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; + &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; int removed = 0; dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); @@ -1211,7 +1210,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, int mds; struct ceph_cap_snap *capsnap; u32 mseq; - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; struct ceph_mds_session *session = NULL; /* if session != NULL, we hold session->s_mutex */ u64 next_follows = 0; /* keep track of how far we've gotten through the @@ -1337,7 +1336,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) { struct ceph_mds_client *mdsc = - ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; + &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; struct inode *inode = &ci->vfs_inode; int was = ci->i_dirty_caps; int dirty = 0; @@ -1379,7 +1378,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) static int __mark_caps_flushing(struct inode *inode, struct ceph_mds_session *session) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); int flushing; @@ -1417,6 +1416,17 @@ static int __mark_caps_flushing(struct inode *inode, /* * try to invalidate mapping pages without blocking. */ +static int mapping_is_empty(struct address_space *mapping) +{ + struct page *page = find_get_page(mapping, 0); + + if (!page) + return 1; + + put_page(page); + return 0; +} + static int try_nonblocking_invalidate(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); @@ -1426,7 +1436,7 @@ static int try_nonblocking_invalidate(struct inode *inode) invalidate_mapping_pages(&inode->i_data, 0, -1); spin_lock(&inode->i_lock); - if (inode->i_data.nrpages == 0 && + if (mapping_is_empty(&inode->i_data) && invalidating_gen == ci->i_rdcache_gen) { /* success. */ dout("try_nonblocking_invalidate %p success\n", inode); @@ -1452,8 +1462,8 @@ static int try_nonblocking_invalidate(struct inode *inode) void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session) { - struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); + struct ceph_mds_client *mdsc = &client->mdsc; struct inode *inode = &ci->vfs_inode; struct ceph_cap *cap; int file_wanted, used; @@ -1523,7 +1533,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, */ if ((!is_delayed || mdsc->stopping) && ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ - inode->i_data.nrpages && /* have cached pages */ + ci->i_rdcache_gen && /* may have cached pages */ (file_wanted == 0 || /* no open files */ (revoking & (CEPH_CAP_FILE_CACHE| CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ @@ -1696,7 +1706,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, unsigned *flush_tid) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); int unlock_session = session ? 0 : 1; int flushing = 0; @@ -1862,7 +1872,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) caps_are_flushed(inode, flush_tid)); } else { struct ceph_mds_client *mdsc = - ceph_sb_to_client(inode->i_sb)->mdsc; + &ceph_sb_to_client(inode->i_sb)->mdsc; spin_lock(&inode->i_lock); if (__ceph_caps_dirty(ci)) @@ -2455,7 +2465,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, __releases(inode->i_lock) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; unsigned seq = le32_to_cpu(m->seq); int dirty = le32_to_cpu(m->dirty); int cleaned = 0; @@ -2703,7 +2713,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_msg *msg) { struct ceph_mds_client *mdsc = session->s_mdsc; - struct super_block *sb = mdsc->fsc->sb; + struct super_block *sb = mdsc->client->sb; struct inode *inode; struct ceph_cap *cap; struct ceph_mds_caps *h; diff --git a/trunk/include/linux/ceph/ceph_debug.h b/trunk/fs/ceph/ceph_debug.h similarity index 86% rename from trunk/include/linux/ceph/ceph_debug.h rename to trunk/fs/ceph/ceph_debug.h index aa2e19182d99..1818c2305610 100644 --- a/trunk/include/linux/ceph/ceph_debug.h +++ b/trunk/fs/ceph/ceph_debug.h @@ -3,7 +3,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG +#ifdef CONFIG_CEPH_FS_PRETTYDEBUG /* * wrap pr_debug to include a filename:lineno prefix on each line. @@ -14,8 +14,7 @@ # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) extern const char *ceph_file_part(const char *s, int len); # define dout(fmt, ...) \ - pr_debug("%.*s %12.12s:%-4d : " fmt, \ - 8 - (int)sizeof(KBUILD_MODNAME), " ", \ + pr_debug(" %12.12s:%-4d : " fmt, \ ceph_file_part(__FILE__, sizeof(__FILE__)), \ __LINE__, ##__VA_ARGS__) # else diff --git a/trunk/fs/ceph/ceph_frag.c b/trunk/fs/ceph/ceph_frag.c index bdce8b1fbd06..ab6cf35c4091 100644 --- a/trunk/fs/ceph/ceph_frag.c +++ b/trunk/fs/ceph/ceph_frag.c @@ -1,8 +1,7 @@ /* * Ceph 'frag' type */ -#include -#include +#include "types.h" int ceph_frag_compare(__u32 a, __u32 b) { diff --git a/trunk/include/linux/ceph/ceph_frag.h b/trunk/fs/ceph/ceph_frag.h similarity index 100% rename from trunk/include/linux/ceph/ceph_frag.h rename to trunk/fs/ceph/ceph_frag.h diff --git a/trunk/net/ceph/ceph_fs.c b/trunk/fs/ceph/ceph_fs.c similarity index 92% rename from trunk/net/ceph/ceph_fs.c rename to trunk/fs/ceph/ceph_fs.c index a3a3a31d3c37..3ac6cc7c1156 100644 --- a/trunk/net/ceph/ceph_fs.c +++ b/trunk/fs/ceph/ceph_fs.c @@ -1,8 +1,7 @@ /* * Some non-inline ceph helpers */ -#include -#include +#include "types.h" /* * return true if @layout appears to be valid @@ -53,7 +52,6 @@ int ceph_flags_to_mode(int flags) return mode; } -EXPORT_SYMBOL(ceph_flags_to_mode); int ceph_caps_for_mode(int mode) { @@ -72,4 +70,3 @@ int ceph_caps_for_mode(int mode) return caps; } -EXPORT_SYMBOL(ceph_caps_for_mode); diff --git a/trunk/include/linux/ceph/ceph_fs.h b/trunk/fs/ceph/ceph_fs.h similarity index 99% rename from trunk/include/linux/ceph/ceph_fs.h rename to trunk/fs/ceph/ceph_fs.h index c3c74aef289d..d5619ac86711 100644 --- a/trunk/include/linux/ceph/ceph_fs.h +++ b/trunk/fs/ceph/ceph_fs.h @@ -299,7 +299,6 @@ enum { CEPH_MDS_OP_SETATTR = 0x01108, CEPH_MDS_OP_SETFILELOCK= 0x01109, CEPH_MDS_OP_GETFILELOCK= 0x00110, - CEPH_MDS_OP_SETDIRLAYOUT=0x0110a, CEPH_MDS_OP_MKNOD = 0x01201, CEPH_MDS_OP_LINK = 0x01202, diff --git a/trunk/net/ceph/ceph_hash.c b/trunk/fs/ceph/ceph_hash.c similarity index 98% rename from trunk/net/ceph/ceph_hash.c rename to trunk/fs/ceph/ceph_hash.c index 815ef8826796..bd570015d147 100644 --- a/trunk/net/ceph/ceph_hash.c +++ b/trunk/fs/ceph/ceph_hash.c @@ -1,5 +1,5 @@ -#include +#include "types.h" /* * Robert Jenkin's hash function. diff --git a/trunk/include/linux/ceph/ceph_hash.h b/trunk/fs/ceph/ceph_hash.h similarity index 100% rename from trunk/include/linux/ceph/ceph_hash.h rename to trunk/fs/ceph/ceph_hash.h diff --git a/trunk/fs/ceph/strings.c b/trunk/fs/ceph/ceph_strings.c similarity index 59% rename from trunk/fs/ceph/strings.c rename to trunk/fs/ceph/ceph_strings.c index cd5097d7c804..c6179d3a26a2 100644 --- a/trunk/fs/ceph/strings.c +++ b/trunk/fs/ceph/ceph_strings.c @@ -1,9 +1,71 @@ /* - * Ceph fs string constants + * Ceph string constants */ -#include -#include +#include "types.h" +const char *ceph_entity_type_name(int type) +{ + switch (type) { + case CEPH_ENTITY_TYPE_MDS: return "mds"; + case CEPH_ENTITY_TYPE_OSD: return "osd"; + case CEPH_ENTITY_TYPE_MON: return "mon"; + case CEPH_ENTITY_TYPE_CLIENT: return "client"; + case CEPH_ENTITY_TYPE_AUTH: return "auth"; + default: return "unknown"; + } +} + +const char *ceph_osd_op_name(int op) +{ + switch (op) { + case CEPH_OSD_OP_READ: return "read"; + case CEPH_OSD_OP_STAT: return "stat"; + + case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; + + case CEPH_OSD_OP_WRITE: return "write"; + case CEPH_OSD_OP_DELETE: return "delete"; + case CEPH_OSD_OP_TRUNCATE: return "truncate"; + case CEPH_OSD_OP_ZERO: return "zero"; + case CEPH_OSD_OP_WRITEFULL: return "writefull"; + case CEPH_OSD_OP_ROLLBACK: return "rollback"; + + case CEPH_OSD_OP_APPEND: return "append"; + case CEPH_OSD_OP_STARTSYNC: return "startsync"; + case CEPH_OSD_OP_SETTRUNC: return "settrunc"; + case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; + + case CEPH_OSD_OP_TMAPUP: return "tmapup"; + case CEPH_OSD_OP_TMAPGET: return "tmapget"; + case CEPH_OSD_OP_TMAPPUT: return "tmapput"; + + case CEPH_OSD_OP_GETXATTR: return "getxattr"; + case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; + case CEPH_OSD_OP_SETXATTR: return "setxattr"; + case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; + case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; + case CEPH_OSD_OP_RMXATTR: return "rmxattr"; + case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; + + case CEPH_OSD_OP_PULL: return "pull"; + case CEPH_OSD_OP_PUSH: return "push"; + case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; + case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; + case CEPH_OSD_OP_SCRUB: return "scrub"; + + case CEPH_OSD_OP_WRLOCK: return "wrlock"; + case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; + case CEPH_OSD_OP_RDLOCK: return "rdlock"; + case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; + case CEPH_OSD_OP_UPLOCK: return "uplock"; + case CEPH_OSD_OP_DNLOCK: return "dnlock"; + + case CEPH_OSD_OP_CALL: return "call"; + + case CEPH_OSD_OP_PGLS: return "pgls"; + } + return "???"; +} const char *ceph_mds_state_name(int s) { @@ -115,3 +177,17 @@ const char *ceph_snap_op_name(int o) } return "???"; } + +const char *ceph_pool_op_name(int op) +{ + switch (op) { + case POOL_OP_CREATE: return "create"; + case POOL_OP_DELETE: return "delete"; + case POOL_OP_AUID_CHANGE: return "auid change"; + case POOL_OP_CREATE_SNAP: return "create snap"; + case POOL_OP_DELETE_SNAP: return "delete snap"; + case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; + case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; + } + return "???"; +} diff --git a/trunk/net/ceph/crush/crush.c b/trunk/fs/ceph/crush/crush.c similarity index 99% rename from trunk/net/ceph/crush/crush.c rename to trunk/fs/ceph/crush/crush.c index d6ebb13a18a4..fabd302e5779 100644 --- a/trunk/net/ceph/crush/crush.c +++ b/trunk/fs/ceph/crush/crush.c @@ -8,7 +8,7 @@ # define BUG_ON(x) assert(!(x)) #endif -#include +#include "crush.h" const char *crush_bucket_alg_name(int alg) { diff --git a/trunk/include/linux/crush/crush.h b/trunk/fs/ceph/crush/crush.h similarity index 100% rename from trunk/include/linux/crush/crush.h rename to trunk/fs/ceph/crush/crush.h diff --git a/trunk/net/ceph/crush/hash.c b/trunk/fs/ceph/crush/hash.c similarity index 99% rename from trunk/net/ceph/crush/hash.c rename to trunk/fs/ceph/crush/hash.c index 5bb63e37a8a1..5873aed694bf 100644 --- a/trunk/net/ceph/crush/hash.c +++ b/trunk/fs/ceph/crush/hash.c @@ -1,6 +1,6 @@ #include -#include +#include "hash.h" /* * Robert Jenkins' function for mixing 32-bit values diff --git a/trunk/include/linux/crush/hash.h b/trunk/fs/ceph/crush/hash.h similarity index 100% rename from trunk/include/linux/crush/hash.h rename to trunk/fs/ceph/crush/hash.h diff --git a/trunk/net/ceph/crush/mapper.c b/trunk/fs/ceph/crush/mapper.c similarity index 99% rename from trunk/net/ceph/crush/mapper.c rename to trunk/fs/ceph/crush/mapper.c index 42599e31dcad..a4eec133258e 100644 --- a/trunk/net/ceph/crush/mapper.c +++ b/trunk/fs/ceph/crush/mapper.c @@ -18,8 +18,8 @@ # define kfree(x) free(x) #endif -#include -#include +#include "crush.h" +#include "hash.h" /* * Implement the core CRUSH mapping algorithm. diff --git a/trunk/include/linux/crush/mapper.h b/trunk/fs/ceph/crush/mapper.h similarity index 100% rename from trunk/include/linux/crush/mapper.h rename to trunk/fs/ceph/crush/mapper.h diff --git a/trunk/net/ceph/crypto.c b/trunk/fs/ceph/crypto.c similarity index 99% rename from trunk/net/ceph/crypto.c rename to trunk/fs/ceph/crypto.c index 7b505b0c983f..a3e627f63293 100644 --- a/trunk/net/ceph/crypto.c +++ b/trunk/fs/ceph/crypto.c @@ -1,13 +1,13 @@ -#include +#include "ceph_debug.h" #include #include #include #include -#include #include "crypto.h" +#include "decode.h" int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) { diff --git a/trunk/net/ceph/crypto.h b/trunk/fs/ceph/crypto.h similarity index 95% rename from trunk/net/ceph/crypto.h rename to trunk/fs/ceph/crypto.h index f9eccace592b..bdf38607323c 100644 --- a/trunk/net/ceph/crypto.h +++ b/trunk/fs/ceph/crypto.h @@ -1,8 +1,8 @@ #ifndef _FS_CEPH_CRYPTO_H #define _FS_CEPH_CRYPTO_H -#include -#include +#include "types.h" +#include "buffer.h" /* * cryptographic secret diff --git a/trunk/fs/ceph/debugfs.c b/trunk/fs/ceph/debugfs.c index 7ae1b3d55b58..6fd8b20a8611 100644 --- a/trunk/fs/ceph/debugfs.c +++ b/trunk/fs/ceph/debugfs.c @@ -1,4 +1,4 @@ -#include +#include "ceph_debug.h" #include #include @@ -7,49 +7,143 @@ #include #include -#include -#include -#include -#include - #include "super.h" +#include "mds_client.h" +#include "mon_client.h" +#include "auth.h" #ifdef CONFIG_DEBUG_FS -#include "mds_client.h" +/* + * Implement /sys/kernel/debug/ceph fun + * + * /sys/kernel/debug/ceph/client* - an instance of the ceph client + * .../osdmap - current osdmap + * .../mdsmap - current mdsmap + * .../monmap - current monmap + * .../osdc - active osd requests + * .../mdsc - active mds requests + * .../monc - mon client state + * .../dentry_lru - dump contents of dentry lru + * .../caps - expose cap (reservation) stats + * .../bdi - symlink to ../../bdi/something + */ + +static struct dentry *ceph_debugfs_dir; + +static int monmap_show(struct seq_file *s, void *p) +{ + int i; + struct ceph_client *client = s->private; + + if (client->monc.monmap == NULL) + return 0; + + seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); + for (i = 0; i < client->monc.monmap->num_mon; i++) { + struct ceph_entity_inst *inst = + &client->monc.monmap->mon_inst[i]; + + seq_printf(s, "\t%s%lld\t%s\n", + ENTITY_NAME(inst->name), + pr_addr(&inst->addr.in_addr)); + } + return 0; +} static int mdsmap_show(struct seq_file *s, void *p) { int i; - struct ceph_fs_client *fsc = s->private; + struct ceph_client *client = s->private; - if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL) + if (client->mdsc.mdsmap == NULL) return 0; - seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch); - seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root); + seq_printf(s, "epoch %d\n", client->mdsc.mdsmap->m_epoch); + seq_printf(s, "root %d\n", client->mdsc.mdsmap->m_root); seq_printf(s, "session_timeout %d\n", - fsc->mdsc->mdsmap->m_session_timeout); + client->mdsc.mdsmap->m_session_timeout); seq_printf(s, "session_autoclose %d\n", - fsc->mdsc->mdsmap->m_session_autoclose); - for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) { + client->mdsc.mdsmap->m_session_autoclose); + for (i = 0; i < client->mdsc.mdsmap->m_max_mds; i++) { struct ceph_entity_addr *addr = - &fsc->mdsc->mdsmap->m_info[i].addr; - int state = fsc->mdsc->mdsmap->m_info[i].state; + &client->mdsc.mdsmap->m_info[i].addr; + int state = client->mdsc.mdsmap->m_info[i].state; - seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, - ceph_pr_addr(&addr->in_addr), + seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, pr_addr(&addr->in_addr), ceph_mds_state_name(state)); } return 0; } -/* - * mdsc debugfs - */ +static int osdmap_show(struct seq_file *s, void *p) +{ + int i; + struct ceph_client *client = s->private; + struct rb_node *n; + + if (client->osdc.osdmap == NULL) + return 0; + seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); + seq_printf(s, "flags%s%s\n", + (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? + " NEARFULL" : "", + (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? + " FULL" : ""); + for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { + struct ceph_pg_pool_info *pool = + rb_entry(n, struct ceph_pg_pool_info, node); + seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", + pool->id, pool->v.pg_num, pool->pg_num_mask, + pool->v.lpg_num, pool->lpg_num_mask); + } + for (i = 0; i < client->osdc.osdmap->max_osd; i++) { + struct ceph_entity_addr *addr = + &client->osdc.osdmap->osd_addr[i]; + int state = client->osdc.osdmap->osd_state[i]; + char sb[64]; + + seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", + i, pr_addr(&addr->in_addr), + ((client->osdc.osdmap->osd_weight[i]*100) >> 16), + ceph_osdmap_state_str(sb, sizeof(sb), state)); + } + return 0; +} + +static int monc_show(struct seq_file *s, void *p) +{ + struct ceph_client *client = s->private; + struct ceph_mon_generic_request *req; + struct ceph_mon_client *monc = &client->monc; + struct rb_node *rp; + + mutex_lock(&monc->mutex); + + if (monc->have_mdsmap) + seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap); + if (monc->have_osdmap) + seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap); + if (monc->want_next_osdmap) + seq_printf(s, "want next osdmap\n"); + + for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { + __u16 op; + req = rb_entry(rp, struct ceph_mon_generic_request, node); + op = le16_to_cpu(req->request->hdr.type); + if (op == CEPH_MSG_STATFS) + seq_printf(s, "%lld statfs\n", req->tid); + else + seq_printf(s, "%lld unknown\n", req->tid); + } + + mutex_unlock(&monc->mutex); + return 0; +} + static int mdsc_show(struct seq_file *s, void *p) { - struct ceph_fs_client *fsc = s->private; - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = s->private; + struct ceph_mds_client *mdsc = &client->mdsc; struct ceph_mds_request *req; struct rb_node *rp; int pathlen; @@ -120,12 +214,61 @@ static int mdsc_show(struct seq_file *s, void *p) return 0; } +static int osdc_show(struct seq_file *s, void *pp) +{ + struct ceph_client *client = s->private; + struct ceph_osd_client *osdc = &client->osdc; + struct rb_node *p; + + mutex_lock(&osdc->request_mutex); + for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { + struct ceph_osd_request *req; + struct ceph_osd_request_head *head; + struct ceph_osd_op *op; + int num_ops; + int opcode, olen; + int i; + + req = rb_entry(p, struct ceph_osd_request, r_node); + + seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, + req->r_osd ? req->r_osd->o_osd : -1, + le32_to_cpu(req->r_pgid.pool), + le16_to_cpu(req->r_pgid.ps)); + + head = req->r_request->front.iov_base; + op = (void *)(head + 1); + + num_ops = le16_to_cpu(head->num_ops); + olen = le32_to_cpu(head->object_len); + seq_printf(s, "%.*s", olen, + (const char *)(head->ops + num_ops)); + + if (req->r_reassert_version.epoch) + seq_printf(s, "\t%u'%llu", + (unsigned)le32_to_cpu(req->r_reassert_version.epoch), + le64_to_cpu(req->r_reassert_version.version)); + else + seq_printf(s, "\t"); + + for (i = 0; i < num_ops; i++) { + opcode = le16_to_cpu(op->op); + seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); + op++; + } + + seq_printf(s, "\n"); + } + mutex_unlock(&osdc->request_mutex); + return 0; +} + static int caps_show(struct seq_file *s, void *p) { - struct ceph_fs_client *fsc = s->private; + struct ceph_client *client = s->private; int total, avail, used, reserved, min; - ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min); + ceph_reservation_status(client, &total, &avail, &used, &reserved, &min); seq_printf(s, "total\t\t%d\n" "avail\t\t%d\n" "used\t\t%d\n" @@ -137,8 +280,8 @@ static int caps_show(struct seq_file *s, void *p) static int dentry_lru_show(struct seq_file *s, void *ptr) { - struct ceph_fs_client *fsc = s->private; - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = s->private; + struct ceph_mds_client *mdsc = &client->mdsc; struct ceph_dentry_info *di; spin_lock(&mdsc->dentry_lru_lock); @@ -152,124 +295,199 @@ static int dentry_lru_show(struct seq_file *s, void *ptr) return 0; } -CEPH_DEFINE_SHOW_FUNC(mdsmap_show) -CEPH_DEFINE_SHOW_FUNC(mdsc_show) -CEPH_DEFINE_SHOW_FUNC(caps_show) -CEPH_DEFINE_SHOW_FUNC(dentry_lru_show) - +#define DEFINE_SHOW_FUNC(name) \ +static int name##_open(struct inode *inode, struct file *file) \ +{ \ + struct seq_file *sf; \ + int ret; \ + \ + ret = single_open(file, name, NULL); \ + sf = file->private_data; \ + sf->private = inode->i_private; \ + return ret; \ +} \ + \ +static const struct file_operations name##_fops = { \ + .open = name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +}; + +DEFINE_SHOW_FUNC(monmap_show) +DEFINE_SHOW_FUNC(mdsmap_show) +DEFINE_SHOW_FUNC(osdmap_show) +DEFINE_SHOW_FUNC(monc_show) +DEFINE_SHOW_FUNC(mdsc_show) +DEFINE_SHOW_FUNC(osdc_show) +DEFINE_SHOW_FUNC(dentry_lru_show) +DEFINE_SHOW_FUNC(caps_show) -/* - * debugfs - */ static int congestion_kb_set(void *data, u64 val) { - struct ceph_fs_client *fsc = (struct ceph_fs_client *)data; + struct ceph_client *client = (struct ceph_client *)data; + + if (client) + client->mount_args->congestion_kb = (int)val; - fsc->mount_options->congestion_kb = (int)val; return 0; } static int congestion_kb_get(void *data, u64 *val) { - struct ceph_fs_client *fsc = (struct ceph_fs_client *)data; + struct ceph_client *client = (struct ceph_client *)data; + + if (client) + *val = (u64)client->mount_args->congestion_kb; - *val = (u64)fsc->mount_options->congestion_kb; return 0; } + DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, congestion_kb_set, "%llu\n"); +int __init ceph_debugfs_init(void) +{ + ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); + if (!ceph_debugfs_dir) + return -ENOMEM; + return 0; +} -void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) +void ceph_debugfs_cleanup(void) { - dout("ceph_fs_debugfs_cleanup\n"); - debugfs_remove(fsc->debugfs_bdi); - debugfs_remove(fsc->debugfs_congestion_kb); - debugfs_remove(fsc->debugfs_mdsmap); - debugfs_remove(fsc->debugfs_caps); - debugfs_remove(fsc->debugfs_mdsc); - debugfs_remove(fsc->debugfs_dentry_lru); + debugfs_remove(ceph_debugfs_dir); } -int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) +int ceph_debugfs_client_init(struct ceph_client *client) { - char name[100]; - int err = -ENOMEM; + int ret = 0; + char name[80]; - dout("ceph_fs_debugfs_init\n"); - fsc->debugfs_congestion_kb = - debugfs_create_file("writeback_congestion_kb", - 0600, - fsc->client->debugfs_dir, - fsc, - &congestion_kb_fops); - if (!fsc->debugfs_congestion_kb) + snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, + client->monc.auth->global_id); + + client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); + if (!client->debugfs_dir) goto out; - dout("a\n"); + client->monc.debugfs_file = debugfs_create_file("monc", + 0600, + client->debugfs_dir, + client, + &monc_show_fops); + if (!client->monc.debugfs_file) + goto out; + + client->mdsc.debugfs_file = debugfs_create_file("mdsc", + 0600, + client->debugfs_dir, + client, + &mdsc_show_fops); + if (!client->mdsc.debugfs_file) + goto out; - snprintf(name, sizeof(name), "../../bdi/%s", - dev_name(fsc->backing_dev_info.dev)); - fsc->debugfs_bdi = - debugfs_create_symlink("bdi", - fsc->client->debugfs_dir, - name); - if (!fsc->debugfs_bdi) + client->osdc.debugfs_file = debugfs_create_file("osdc", + 0600, + client->debugfs_dir, + client, + &osdc_show_fops); + if (!client->osdc.debugfs_file) goto out; - dout("b\n"); - fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", + client->debugfs_monmap = debugfs_create_file("monmap", 0600, - fsc->client->debugfs_dir, - fsc, + client->debugfs_dir, + client, + &monmap_show_fops); + if (!client->debugfs_monmap) + goto out; + + client->debugfs_mdsmap = debugfs_create_file("mdsmap", + 0600, + client->debugfs_dir, + client, &mdsmap_show_fops); - if (!fsc->debugfs_mdsmap) + if (!client->debugfs_mdsmap) + goto out; + + client->debugfs_osdmap = debugfs_create_file("osdmap", + 0600, + client->debugfs_dir, + client, + &osdmap_show_fops); + if (!client->debugfs_osdmap) goto out; - dout("ca\n"); - fsc->debugfs_mdsc = debugfs_create_file("mdsc", - 0600, - fsc->client->debugfs_dir, - fsc, - &mdsc_show_fops); - if (!fsc->debugfs_mdsc) + client->debugfs_dentry_lru = debugfs_create_file("dentry_lru", + 0600, + client->debugfs_dir, + client, + &dentry_lru_show_fops); + if (!client->debugfs_dentry_lru) goto out; - dout("da\n"); - fsc->debugfs_caps = debugfs_create_file("caps", + client->debugfs_caps = debugfs_create_file("caps", 0400, - fsc->client->debugfs_dir, - fsc, + client->debugfs_dir, + client, &caps_show_fops); - if (!fsc->debugfs_caps) + if (!client->debugfs_caps) goto out; - dout("ea\n"); - fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", - 0600, - fsc->client->debugfs_dir, - fsc, - &dentry_lru_show_fops); - if (!fsc->debugfs_dentry_lru) + client->debugfs_congestion_kb = + debugfs_create_file("writeback_congestion_kb", + 0600, + client->debugfs_dir, + client, + &congestion_kb_fops); + if (!client->debugfs_congestion_kb) goto out; + sprintf(name, "../../bdi/%s", dev_name(client->sb->s_bdi->dev)); + client->debugfs_bdi = debugfs_create_symlink("bdi", client->debugfs_dir, + name); + return 0; out: - ceph_fs_debugfs_cleanup(fsc); - return err; + ceph_debugfs_client_cleanup(client); + return ret; } +void ceph_debugfs_client_cleanup(struct ceph_client *client) +{ + debugfs_remove(client->debugfs_bdi); + debugfs_remove(client->debugfs_caps); + debugfs_remove(client->debugfs_dentry_lru); + debugfs_remove(client->debugfs_osdmap); + debugfs_remove(client->debugfs_mdsmap); + debugfs_remove(client->debugfs_monmap); + debugfs_remove(client->osdc.debugfs_file); + debugfs_remove(client->mdsc.debugfs_file); + debugfs_remove(client->monc.debugfs_file); + debugfs_remove(client->debugfs_congestion_kb); + debugfs_remove(client->debugfs_dir); +} #else /* CONFIG_DEBUG_FS */ -int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) +int __init ceph_debugfs_init(void) +{ + return 0; +} + +void ceph_debugfs_cleanup(void) +{ +} + +int ceph_debugfs_client_init(struct ceph_client *client) { return 0; } -void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) +void ceph_debugfs_client_cleanup(struct ceph_client *client) { } diff --git a/trunk/include/linux/ceph/decode.h b/trunk/fs/ceph/decode.h similarity index 96% rename from trunk/include/linux/ceph/decode.h rename to trunk/fs/ceph/decode.h index c5b6939fb32a..3d25415afe63 100644 --- a/trunk/include/linux/ceph/decode.h +++ b/trunk/fs/ceph/decode.h @@ -191,11 +191,6 @@ static inline void ceph_encode_string(void **p, void *end, ceph_encode_need(p, end, n, bad); \ ceph_encode_copy(p, pv, n); \ } while (0) -#define ceph_encode_string_safe(p, end, s, n, bad) \ - do { \ - ceph_encode_need(p, end, n, bad); \ - ceph_encode_string(p, end, s, n); \ - } while (0) #endif diff --git a/trunk/fs/ceph/dir.c b/trunk/fs/ceph/dir.c index e0a2dc6fcafc..a1986eb52045 100644 --- a/trunk/fs/ceph/dir.c +++ b/trunk/fs/ceph/dir.c @@ -1,4 +1,4 @@ -#include +#include "ceph_debug.h" #include #include @@ -7,7 +7,6 @@ #include #include "super.h" -#include "mds_client.h" /* * Directory operations: readdir, lookup, create, link, unlink, @@ -95,7 +94,10 @@ static unsigned fpos_off(loff_t p) */ static int __dcache_readdir(struct file *filp, void *dirent, filldir_t filldir) + __releases(inode->i_lock) + __acquires(inode->i_lock) { + struct inode *inode = filp->f_dentry->d_inode; struct ceph_file_info *fi = filp->private_data; struct dentry *parent = filp->f_dentry; struct inode *dir = parent->d_inode; @@ -151,6 +153,7 @@ static int __dcache_readdir(struct file *filp, atomic_inc(&dentry->d_count); spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); @@ -168,30 +171,35 @@ static int __dcache_readdir(struct file *filp, } else { dput(last); } + last = NULL; } + + spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); + last = dentry; if (err < 0) - goto out; + goto out_unlock; + p = p->prev; filp->f_pos++; /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ - if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { - dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); - err = -EAGAIN; - goto out; - } - - spin_lock(&dcache_lock); - p = p->prev; /* advance to next dentry */ - goto more; + if ((ceph_inode(dir)->i_ceph_flags & CEPH_I_COMPLETE)) + goto more; + dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); + err = -EAGAIN; out_unlock: spin_unlock(&dcache_lock); -out: - if (last) + + if (last) { + spin_unlock(&inode->i_lock); dput(last); + spin_lock(&inode->i_lock); + } + return err; } @@ -219,15 +227,15 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) struct ceph_file_info *fi = filp->private_data; struct inode *inode = filp->f_dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_inode_to_client(inode); + struct ceph_mds_client *mdsc = &client->mdsc; unsigned frag = fpos_frag(filp->f_pos); int off = fpos_off(filp->f_pos); int err; u32 ftype; struct ceph_mds_reply_info_parsed *rinfo; - const int max_entries = fsc->mount_options->max_readdir; - const int max_bytes = fsc->mount_options->max_readdir_bytes; + const int max_entries = client->mount_args->max_readdir; + const int max_bytes = client->mount_args->max_readdir_bytes; dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); if (fi->at_end) @@ -259,17 +267,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) /* can we use the dcache? */ spin_lock(&inode->i_lock); if ((filp->f_pos == 2 || fi->dentry) && - !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && + !ceph_test_opt(client, NOASYNCREADDIR) && ceph_snap(inode) != CEPH_SNAPDIR && (ci->i_ceph_flags & CEPH_I_COMPLETE) && __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { - spin_unlock(&inode->i_lock); err = __dcache_readdir(filp, dirent, filldir); - if (err != -EAGAIN) + if (err != -EAGAIN) { + spin_unlock(&inode->i_lock); return err; - } else { - spin_unlock(&inode->i_lock); + } } + spin_unlock(&inode->i_lock); if (fi->dentry) { err = note_last_dentry(fi, fi->dentry->d_name.name, fi->dentry->d_name.len); @@ -479,13 +487,14 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, struct dentry *dentry, int err) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); + struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); struct inode *parent = dentry->d_parent->d_inode; /* .snap dir? */ if (err == -ENOENT && + ceph_vino(parent).ino != CEPH_INO_ROOT && /* no .snap in root dir */ strcmp(dentry->d_name.name, - fsc->mount_options->snapdir_name) == 0) { + client->mount_args->snapdir_name) == 0) { struct inode *inode = ceph_get_snapdir(parent); dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", dentry, dentry->d_name.len, dentry->d_name.name, inode); @@ -530,8 +539,8 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_sb_to_client(dir->i_sb); + struct ceph_mds_client *mdsc = &client->mdsc; struct ceph_mds_request *req; int op; int err; @@ -563,7 +572,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, spin_lock(&dir->i_lock); dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); if (strncmp(dentry->d_name.name, - fsc->mount_options->snapdir_name, + client->mount_args->snapdir_name, dentry->d_name.len) && !is_root_ceph_dentry(dir, dentry) && (ci->i_ceph_flags & CEPH_I_COMPLETE) && @@ -620,8 +629,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) static int ceph_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_sb_to_client(dir->i_sb); + struct ceph_mds_client *mdsc = &client->mdsc; struct ceph_mds_request *req; int err; @@ -676,8 +685,8 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, int mode, static int ceph_symlink(struct inode *dir, struct dentry *dentry, const char *dest) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_sb_to_client(dir->i_sb); + struct ceph_mds_client *mdsc = &client->mdsc; struct ceph_mds_request *req; int err; @@ -707,8 +716,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_sb_to_client(dir->i_sb); + struct ceph_mds_client *mdsc = &client->mdsc; struct ceph_mds_request *req; int err = -EROFS; int op; @@ -749,8 +758,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) static int ceph_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_sb_to_client(dir->i_sb); + struct ceph_mds_client *mdsc = &client->mdsc; struct ceph_mds_request *req; int err; @@ -804,8 +813,8 @@ static int drop_caps_for_unlink(struct inode *inode) */ static int ceph_unlink(struct inode *dir, struct dentry *dentry) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_sb_to_client(dir->i_sb); + struct ceph_mds_client *mdsc = &client->mdsc; struct inode *inode = dentry->d_inode; struct ceph_mds_request *req; int err = -EROFS; @@ -845,8 +854,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_sb_to_client(old_dir->i_sb); + struct ceph_mds_client *mdsc = &client->mdsc; struct ceph_mds_request *req; int err; @@ -1067,7 +1076,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, struct ceph_inode_info *ci = ceph_inode(inode); int left; - if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) + if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) return -EISDIR; if (!cf->dir_info) { @@ -1168,7 +1177,7 @@ void ceph_dentry_lru_add(struct dentry *dn) dout("dentry_lru_add %p %p '%.*s'\n", di, dn, dn->d_name.len, dn->d_name.name); if (di) { - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_lru_lock); list_add_tail(&di->lru, &mdsc->dentry_lru); mdsc->num_dentry++; @@ -1184,7 +1193,7 @@ void ceph_dentry_lru_touch(struct dentry *dn) dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, dn->d_name.len, dn->d_name.name, di->offset); if (di) { - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_lru_lock); list_move_tail(&di->lru, &mdsc->dentry_lru); spin_unlock(&mdsc->dentry_lru_lock); @@ -1199,7 +1208,7 @@ void ceph_dentry_lru_del(struct dentry *dn) dout("dentry_lru_del %p %p '%.*s'\n", di, dn, dn->d_name.len, dn->d_name.name); if (di) { - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_lru_lock); list_del_init(&di->lru); mdsc->num_dentry--; diff --git a/trunk/fs/ceph/export.c b/trunk/fs/ceph/export.c index 2297d9426992..e38423e82f2e 100644 --- a/trunk/fs/ceph/export.c +++ b/trunk/fs/ceph/export.c @@ -1,11 +1,10 @@ -#include +#include "ceph_debug.h" #include #include #include #include "super.h" -#include "mds_client.h" /* * NFS export support @@ -121,7 +120,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, static struct dentry *__cfh_to_dentry(struct super_block *sb, struct ceph_nfs_confh *cfh) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc; struct inode *inode; struct dentry *dentry; struct ceph_vino vino; diff --git a/trunk/fs/ceph/file.c b/trunk/fs/ceph/file.c index e77c28cf3690..66e4da6dba22 100644 --- a/trunk/fs/ceph/file.c +++ b/trunk/fs/ceph/file.c @@ -1,6 +1,5 @@ -#include +#include "ceph_debug.h" -#include #include #include #include @@ -39,8 +38,8 @@ static struct ceph_mds_request * prepare_open_request(struct super_block *sb, int flags, int create_mode) { - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_sb_to_client(sb); + struct ceph_mds_client *mdsc = &client->mdsc; struct ceph_mds_request *req; int want_auth = USE_ANY_MDS; int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; @@ -118,8 +117,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) int ceph_open(struct inode *inode, struct file *file) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_sb_to_client(inode->i_sb); + struct ceph_mds_client *mdsc = &client->mdsc; struct ceph_mds_request *req; struct ceph_file_info *cf = file->private_data; struct inode *parent_inode = file->f_dentry->d_parent->d_inode; @@ -217,8 +216,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd, int mode, int locked_dir) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_sb_to_client(dir->i_sb); + struct ceph_mds_client *mdsc = &client->mdsc; struct file *file = nd->intent.open.file; struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); struct ceph_mds_request *req; @@ -270,6 +269,163 @@ int ceph_release(struct inode *inode, struct file *file) return 0; } +/* + * build a vector of user pages + */ +static struct page **get_direct_page_vector(const char __user *data, + int num_pages, + loff_t off, size_t len) +{ + struct page **pages; + int rc; + + pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); + if (!pages) + return ERR_PTR(-ENOMEM); + + down_read(¤t->mm->mmap_sem); + rc = get_user_pages(current, current->mm, (unsigned long)data, + num_pages, 0, 0, pages, NULL); + up_read(¤t->mm->mmap_sem); + if (rc < 0) + goto fail; + return pages; + +fail: + kfree(pages); + return ERR_PTR(rc); +} + +static void put_page_vector(struct page **pages, int num_pages) +{ + int i; + + for (i = 0; i < num_pages; i++) + put_page(pages[i]); + kfree(pages); +} + +void ceph_release_page_vector(struct page **pages, int num_pages) +{ + int i; + + for (i = 0; i < num_pages; i++) + __free_pages(pages[i], 0); + kfree(pages); +} + +/* + * allocate a vector new pages + */ +static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) +{ + struct page **pages; + int i; + + pages = kmalloc(sizeof(*pages) * num_pages, flags); + if (!pages) + return ERR_PTR(-ENOMEM); + for (i = 0; i < num_pages; i++) { + pages[i] = __page_cache_alloc(flags); + if (pages[i] == NULL) { + ceph_release_page_vector(pages, i); + return ERR_PTR(-ENOMEM); + } + } + return pages; +} + +/* + * copy user data into a page vector + */ +static int copy_user_to_page_vector(struct page **pages, + const char __user *data, + loff_t off, size_t len) +{ + int i = 0; + int po = off & ~PAGE_CACHE_MASK; + int left = len; + int l, bad; + + while (left > 0) { + l = min_t(int, PAGE_CACHE_SIZE-po, left); + bad = copy_from_user(page_address(pages[i]) + po, data, l); + if (bad == l) + return -EFAULT; + data += l - bad; + left -= l - bad; + po += l - bad; + if (po == PAGE_CACHE_SIZE) { + po = 0; + i++; + } + } + return len; +} + +/* + * copy user data from a page vector into a user pointer + */ +static int copy_page_vector_to_user(struct page **pages, char __user *data, + loff_t off, size_t len) +{ + int i = 0; + int po = off & ~PAGE_CACHE_MASK; + int left = len; + int l, bad; + + while (left > 0) { + l = min_t(int, left, PAGE_CACHE_SIZE-po); + bad = copy_to_user(data, page_address(pages[i]) + po, l); + if (bad == l) + return -EFAULT; + data += l - bad; + left -= l - bad; + if (po) { + po += l - bad; + if (po == PAGE_CACHE_SIZE) + po = 0; + } + i++; + } + return len; +} + +/* + * Zero an extent within a page vector. Offset is relative to the + * start of the first page. + */ +static void zero_page_vector_range(int off, int len, struct page **pages) +{ + int i = off >> PAGE_CACHE_SHIFT; + + off &= ~PAGE_CACHE_MASK; + + dout("zero_page_vector_page %u~%u\n", off, len); + + /* leading partial page? */ + if (off) { + int end = min((int)PAGE_CACHE_SIZE, off + len); + dout("zeroing %d %p head from %d\n", i, pages[i], + (int)off); + zero_user_segment(pages[i], off, end); + len -= (end - off); + i++; + } + while (len >= PAGE_CACHE_SIZE) { + dout("zeroing %d %p len=%d\n", i, pages[i], len); + zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); + len -= PAGE_CACHE_SIZE; + i++; + } + /* trailing partial page? */ + if (len) { + dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); + zero_user_segment(pages[i], 0, len); + } +} + + /* * Read a range of bytes striped over one or more objects. Iterate over * objects we stripe over. (That's not atomic, but good enough for now.) @@ -282,7 +438,7 @@ static int striped_read(struct inode *inode, struct page **pages, int num_pages, int *checkeof) { - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_client *client = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 pos, this_len; int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ @@ -303,7 +459,7 @@ static int striped_read(struct inode *inode, more: this_len = left; - ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), + ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode), &ci->i_layout, pos, &this_len, ci->i_truncate_seq, ci->i_truncate_size, @@ -321,8 +477,8 @@ static int striped_read(struct inode *inode, if (read < pos - off) { dout(" zero gap %llu to %llu\n", off + read, pos); - ceph_zero_page_vector_range(page_off + read, - pos - off - read, pages); + zero_page_vector_range(page_off + read, + pos - off - read, pages); } pos += ret; read = pos - off; @@ -339,8 +495,8 @@ static int striped_read(struct inode *inode, /* was original extent fully inside i_size? */ if (pos + left <= inode->i_size) { dout("zero tail\n"); - ceph_zero_page_vector_range(page_off + read, len - read, - pages); + zero_page_vector_range(page_off + read, len - read, + pages); read = len; goto out; } @@ -375,7 +531,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); if (file->f_flags & O_DIRECT) { - pages = ceph_get_direct_page_vector(data, num_pages, off, len); + pages = get_direct_page_vector(data, num_pages, off, len); /* * flush any page cache pages in this range. this @@ -396,13 +552,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, ret = striped_read(inode, off, len, pages, num_pages, checkeof); if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) - ret = ceph_copy_page_vector_to_user(pages, data, off, ret); + ret = copy_page_vector_to_user(pages, data, off, ret); if (ret >= 0) *poff = off + ret; done: if (file->f_flags & O_DIRECT) - ceph_put_page_vector(pages, num_pages); + put_page_vector(pages, num_pages); else ceph_release_page_vector(pages, num_pages); dout("sync_read result %d\n", ret); @@ -438,7 +594,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, { struct inode *inode = file->f_dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_client *client = ceph_inode_to_client(inode); struct ceph_osd_request *req; struct page **pages; int num_pages; @@ -486,7 +642,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, */ more: len = left; - req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, + req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, ceph_vino(inode), pos, &len, CEPH_OSD_OP_WRITE, flags, ci->i_snap_realm->cached_context, @@ -499,7 +655,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, num_pages = calc_pages_for(pos, len); if (file->f_flags & O_DIRECT) { - pages = ceph_get_direct_page_vector(data, num_pages, pos, len); + pages = get_direct_page_vector(data, num_pages, pos, len); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out; @@ -517,7 +673,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, ret = PTR_ERR(pages); goto out; } - ret = ceph_copy_user_to_page_vector(pages, data, pos, len); + ret = copy_user_to_page_vector(pages, data, pos, len); if (ret < 0) { ceph_release_page_vector(pages, num_pages); goto out; @@ -533,7 +689,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, req->r_num_pages = num_pages; req->r_inode = inode; - ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); + ret = ceph_osdc_start_request(&client->osdc, req, false); if (!ret) { if (req->r_safe_callback) { /* @@ -545,11 +701,11 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, spin_unlock(&ci->i_unsafe_lock); ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); } - ret = ceph_osdc_wait_request(&fsc->client->osdc, req); + ret = ceph_osdc_wait_request(&client->osdc, req); } if (file->f_flags & O_DIRECT) - ceph_put_page_vector(pages, num_pages); + put_page_vector(pages, num_pages); else if (file->f_flags & O_SYNC) ceph_release_page_vector(pages, num_pages); @@ -658,8 +814,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, struct ceph_file_info *fi = file->private_data; struct inode *inode = file->f_dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_osd_client *osdc = - &ceph_sb_to_client(inode->i_sb)->client->osdc; + struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; loff_t endoff = pos + iov->iov_len; int want, got = 0; int ret, err; diff --git a/trunk/fs/ceph/inode.c b/trunk/fs/ceph/inode.c index 1d6a45b5a04c..62377ec37edf 100644 --- a/trunk/fs/ceph/inode.c +++ b/trunk/fs/ceph/inode.c @@ -1,4 +1,4 @@ -#include +#include "ceph_debug.h" #include #include @@ -13,8 +13,7 @@ #include #include "super.h" -#include "mds_client.h" -#include +#include "decode.h" /* * Ceph inode operations @@ -385,7 +384,7 @@ void ceph_destroy_inode(struct inode *inode) */ if (ci->i_snap_realm) { struct ceph_mds_client *mdsc = - ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; + &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; struct ceph_snap_realm *realm = ci->i_snap_realm; dout(" dropping residual ref to snap realm %p\n", realm); @@ -686,7 +685,7 @@ static int fill_inode(struct inode *inode, } /* it may be better to set st_size in getattr instead? */ - if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) + if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) inode->i_size = ci->i_rbytes; break; default: @@ -902,7 +901,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, struct inode *in = NULL; struct ceph_mds_reply_inode *ininfo; struct ceph_vino vino; - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_client *client = ceph_sb_to_client(sb); int i = 0; int err = 0; @@ -966,7 +965,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, */ if (rinfo->head->is_dentry && !req->r_aborted && (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, - fsc->mount_options->snapdir_name, + client->mount_args->snapdir_name, req->r_dentry->d_name.len))) { /* * lookup link rename : null -> possibly existing inode @@ -1534,7 +1533,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) struct inode *parent_inode = dentry->d_parent->d_inode; const unsigned int ia_valid = attr->ia_valid; struct ceph_mds_request *req; - struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; int issued; int release = 0, dirtied = 0; int mask = 0; @@ -1729,8 +1728,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) */ int ceph_do_getattr(struct inode *inode, int mask) { - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_sb_to_client(inode->i_sb); + struct ceph_mds_client *mdsc = &client->mdsc; struct ceph_mds_request *req; int err; diff --git a/trunk/fs/ceph/ioctl.c b/trunk/fs/ceph/ioctl.c index 8888c9ba68db..76e307d2aba1 100644 --- a/trunk/fs/ceph/ioctl.c +++ b/trunk/fs/ceph/ioctl.c @@ -1,10 +1,8 @@ #include -#include "super.h" -#include "mds_client.h" -#include - #include "ioctl.h" +#include "super.h" +#include "ceph_debug.h" /* @@ -39,7 +37,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) { struct inode *inode = file->f_dentry->d_inode; struct inode *parent_inode = file->f_dentry->d_parent->d_inode; - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_mds_request *req; struct ceph_ioctl_layout l; int err, i; @@ -91,68 +89,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) return err; } -/* - * Set a layout policy on a directory inode. All items in the tree - * rooted at this inode will inherit this layout on creation, - * (It doesn't apply retroactively ) - * unless a subdirectory has its own layout policy. - */ -static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) -{ - struct inode *inode = file->f_dentry->d_inode; - struct ceph_mds_request *req; - struct ceph_ioctl_layout l; - int err, i; - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; - - /* copy and validate */ - if (copy_from_user(&l, arg, sizeof(l))) - return -EFAULT; - - if ((l.object_size & ~PAGE_MASK) || - (l.stripe_unit & ~PAGE_MASK) || - !l.stripe_unit || - (l.object_size && - (unsigned)l.object_size % (unsigned)l.stripe_unit)) - return -EINVAL; - - /* make sure it's a valid data pool */ - if (l.data_pool > 0) { - mutex_lock(&mdsc->mutex); - err = -EINVAL; - for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++) - if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) { - err = 0; - break; - } - mutex_unlock(&mdsc->mutex); - if (err) - return err; - } - - req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT, - USE_AUTH_MDS); - - if (IS_ERR(req)) - return PTR_ERR(req); - req->r_inode = igrab(inode); - - req->r_args.setlayout.layout.fl_stripe_unit = - cpu_to_le32(l.stripe_unit); - req->r_args.setlayout.layout.fl_stripe_count = - cpu_to_le32(l.stripe_count); - req->r_args.setlayout.layout.fl_object_size = - cpu_to_le32(l.object_size); - req->r_args.setlayout.layout.fl_pg_pool = - cpu_to_le32(l.data_pool); - req->r_args.setlayout.layout.fl_pg_preferred = - cpu_to_le32(l.preferred_osd); - - err = ceph_mdsc_do_request(mdsc, inode, req); - ceph_mdsc_put_request(req); - return err; -} - /* * Return object name, size/offset information, and location (OSD * number, network address) for a given file offset. @@ -162,8 +98,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) struct ceph_ioctl_dataloc dl; struct inode *inode = file->f_dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_osd_client *osdc = - &ceph_sb_to_client(inode->i_sb)->client->osdc; + struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; u64 len = 1, olen; u64 tmp; struct ceph_object_layout ol; @@ -239,15 +174,11 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case CEPH_IOC_SET_LAYOUT: return ceph_ioctl_set_layout(file, (void __user *)arg); - case CEPH_IOC_SET_LAYOUT_POLICY: - return ceph_ioctl_set_layout_policy(file, (void __user *)arg); - case CEPH_IOC_GET_DATALOC: return ceph_ioctl_get_dataloc(file, (void __user *)arg); case CEPH_IOC_LAZYIO: return ceph_ioctl_lazyio(file); } - return -ENOTTY; } diff --git a/trunk/fs/ceph/ioctl.h b/trunk/fs/ceph/ioctl.h index a6ce54e94eb5..88451a3b6857 100644 --- a/trunk/fs/ceph/ioctl.h +++ b/trunk/fs/ceph/ioctl.h @@ -4,7 +4,7 @@ #include #include -#define CEPH_IOCTL_MAGIC 0x98 +#define CEPH_IOCTL_MAGIC 0x97 /* just use u64 to align sanely on all archs */ struct ceph_ioctl_layout { @@ -17,8 +17,6 @@ struct ceph_ioctl_layout { struct ceph_ioctl_layout) #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2, \ struct ceph_ioctl_layout) -#define CEPH_IOC_SET_LAYOUT_POLICY _IOW(CEPH_IOCTL_MAGIC, 5, \ - struct ceph_ioctl_layout) /* * Extract identity, address of the OSD and object storing a given diff --git a/trunk/fs/ceph/locks.c b/trunk/fs/ceph/locks.c index 40abde93c345..ff4e753aae92 100644 --- a/trunk/fs/ceph/locks.c +++ b/trunk/fs/ceph/locks.c @@ -1,11 +1,11 @@ -#include +#include "ceph_debug.h" #include #include #include "super.h" #include "mds_client.h" -#include +#include "pagelist.h" /** * Implement fcntl and flock locking functions. @@ -16,7 +16,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, { struct inode *inode = file->f_dentry->d_inode; struct ceph_mds_client *mdsc = - ceph_sb_to_client(inode->i_sb)->mdsc; + &ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_mds_request *req; int err; @@ -181,9 +181,8 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) * Encode the flock and fcntl locks for the given inode into the pagelist. * Format is: #fcntl locks, sequential fcntl locks, #flock locks, * sequential flock locks. - * Must be called with lock_flocks() already held. - * If we encounter more of a specific lock type than expected, - * we return the value 1. + * Must be called with BLK already held, and the lock numbers should have + * been gathered under the same lock holding window. */ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, int num_fcntl_locks, int num_flock_locks) @@ -191,8 +190,6 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, struct file_lock *lock; struct ceph_filelock cephlock; int err = 0; - int seen_fcntl = 0; - int seen_flock = 0; dout("encoding %d flock and %d fcntl locks", num_flock_locks, num_fcntl_locks); @@ -201,11 +198,6 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, goto fail; for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { if (lock->fl_flags & FL_POSIX) { - ++seen_fcntl; - if (seen_fcntl > num_fcntl_locks) { - err = -ENOSPC; - goto fail; - } err = lock_to_ceph_filelock(lock, &cephlock); if (err) goto fail; @@ -221,11 +213,6 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, goto fail; for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { if (lock->fl_flags & FL_FLOCK) { - ++seen_flock; - if (seen_flock > num_flock_locks) { - err = -ENOSPC; - goto fail; - } err = lock_to_ceph_filelock(lock, &cephlock); if (err) goto fail; diff --git a/trunk/fs/ceph/mds_client.c b/trunk/fs/ceph/mds_client.c index 3142b15940c2..fad95f8f2608 100644 --- a/trunk/fs/ceph/mds_client.c +++ b/trunk/fs/ceph/mds_client.c @@ -1,21 +1,17 @@ -#include +#include "ceph_debug.h" -#include #include #include #include -#include -#include #include -#include "super.h" #include "mds_client.h" - -#include -#include -#include -#include -#include +#include "mon_client.h" +#include "super.h" +#include "messenger.h" +#include "decode.h" +#include "auth.h" +#include "pagelist.h" /* * A cluster of MDS (metadata server) daemons is responsible for @@ -290,9 +286,8 @@ void ceph_put_mds_session(struct ceph_mds_session *s) atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); if (atomic_dec_and_test(&s->s_ref)) { if (s->s_authorizer) - s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer( - s->s_mdsc->fsc->client->monc.auth, - s->s_authorizer); + s->s_mdsc->client->monc.auth->ops->destroy_authorizer( + s->s_mdsc->client->monc.auth, s->s_authorizer); kfree(s); } } @@ -349,7 +344,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_seq = 0; mutex_init(&s->s_mutex); - ceph_con_init(mdsc->fsc->client->msgr, &s->s_con); + ceph_con_init(mdsc->client->msgr, &s->s_con); s->s_con.private = s; s->s_con.ops = &mds_con_ops; s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; @@ -604,7 +599,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, } else if (req->r_dentry) { struct inode *dir = req->r_dentry->d_parent->d_inode; - if (dir->i_sb != mdsc->fsc->sb) { + if (dir->i_sb != mdsc->client->sb) { /* not this fs! */ inode = req->r_dentry->d_inode; } else if (ceph_snap(dir) != CEPH_NOSNAP) { @@ -889,7 +884,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, __ceph_remove_cap(cap); if (!__ceph_is_any_real_caps(ci)) { struct ceph_mds_client *mdsc = - ceph_sb_to_client(inode->i_sb)->mdsc; + &ceph_sb_to_client(inode->i_sb)->mdsc; spin_lock(&mdsc->cap_dirty_lock); if (!list_empty(&ci->i_dirty_item)) { @@ -1151,7 +1146,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, struct ceph_msg *msg, *partial = NULL; struct ceph_mds_cap_release *head; int err = -ENOMEM; - int extra = mdsc->fsc->mount_options->cap_release_safety; + int extra = mdsc->client->mount_args->cap_release_safety; int num; dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, @@ -2090,7 +2085,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) /* insert trace into our cache */ mutex_lock(&req->r_fill_mutex); - err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); + err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); if (err == 0) { if (result == 0 && rinfo->dir_nr) ceph_readdir_prepopulate(req, req->r_session); @@ -2366,35 +2361,19 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, if (recon_state->flock) { int num_fcntl_locks, num_flock_locks; - struct ceph_pagelist_cursor trunc_point; - - ceph_pagelist_set_cursor(pagelist, &trunc_point); - do { - lock_flocks(); - ceph_count_locks(inode, &num_fcntl_locks, - &num_flock_locks); - rec.v2.flock_len = (2*sizeof(u32) + - (num_fcntl_locks+num_flock_locks) * - sizeof(struct ceph_filelock)); - unlock_flocks(); - - /* pre-alloc pagelist */ - ceph_pagelist_truncate(pagelist, &trunc_point); - err = ceph_pagelist_append(pagelist, &rec, reclen); - if (!err) - err = ceph_pagelist_reserve(pagelist, - rec.v2.flock_len); - - /* encode locks */ - if (!err) { - lock_flocks(); - err = ceph_encode_locks(inode, - pagelist, - num_fcntl_locks, - num_flock_locks); - unlock_flocks(); - } - } while (err == -ENOSPC); + + lock_kernel(); + ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); + rec.v2.flock_len = (2*sizeof(u32) + + (num_fcntl_locks+num_flock_locks) * + sizeof(struct ceph_filelock)); + + err = ceph_pagelist_append(pagelist, &rec, reclen); + if (!err) + err = ceph_encode_locks(inode, pagelist, + num_fcntl_locks, + num_flock_locks); + unlock_kernel(); } else { err = ceph_pagelist_append(pagelist, &rec, reclen); } @@ -2634,7 +2613,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) { - struct super_block *sb = mdsc->fsc->sb; + struct super_block *sb = mdsc->client->sb; struct inode *inode; struct ceph_inode_info *ci; struct dentry *parent, *dentry; @@ -2912,16 +2891,10 @@ static void delayed_work(struct work_struct *work) schedule_delayed(mdsc); } -int ceph_mdsc_init(struct ceph_fs_client *fsc) +int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) { - struct ceph_mds_client *mdsc; - - mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS); - if (!mdsc) - return -ENOMEM; - mdsc->fsc = fsc; - fsc->mdsc = mdsc; + mdsc->client = client; mutex_init(&mdsc->mutex); mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); if (mdsc->mdsmap == NULL) @@ -2954,7 +2927,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_LIST_HEAD(&mdsc->dentry_lru); ceph_caps_init(mdsc); - ceph_adjust_min_caps(mdsc, fsc->min_caps); + ceph_adjust_min_caps(mdsc, client->min_caps); return 0; } @@ -2966,7 +2939,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) static void wait_requests(struct ceph_mds_client *mdsc) { struct ceph_mds_request *req; - struct ceph_fs_client *fsc = mdsc->fsc; + struct ceph_client *client = mdsc->client; mutex_lock(&mdsc->mutex); if (__get_oldest_req(mdsc)) { @@ -2974,7 +2947,7 @@ static void wait_requests(struct ceph_mds_client *mdsc) dout("wait_requests waiting for requests\n"); wait_for_completion_timeout(&mdsc->safe_umount_waiters, - fsc->client->options->mount_timeout * HZ); + client->mount_args->mount_timeout * HZ); /* tear down remaining requests */ mutex_lock(&mdsc->mutex); @@ -3057,7 +3030,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) { u64 want_tid, want_flush; - if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) + if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) return; dout("sync\n"); @@ -3080,7 +3053,7 @@ bool done_closing_sessions(struct ceph_mds_client *mdsc) { int i, n = 0; - if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) + if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) return true; mutex_lock(&mdsc->mutex); @@ -3098,8 +3071,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) { struct ceph_mds_session *session; int i; - struct ceph_fs_client *fsc = mdsc->fsc; - unsigned long timeout = fsc->client->options->mount_timeout * HZ; + struct ceph_client *client = mdsc->client; + unsigned long timeout = client->mount_args->mount_timeout * HZ; dout("close_sessions\n"); @@ -3146,7 +3119,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) dout("stopped\n"); } -static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) +void ceph_mdsc_stop(struct ceph_mds_client *mdsc) { dout("stop\n"); cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ @@ -3156,15 +3129,6 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) ceph_caps_finalize(mdsc); } -void ceph_mdsc_destroy(struct ceph_fs_client *fsc) -{ - struct ceph_mds_client *mdsc = fsc->mdsc; - - ceph_mdsc_stop(mdsc); - fsc->mdsc = NULL; - kfree(mdsc); -} - /* * handle mds map update. @@ -3181,14 +3145,14 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); ceph_decode_copy(&p, &fsid, sizeof(fsid)); - if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0) + if (ceph_check_fsid(mdsc->client, &fsid) < 0) return; epoch = ceph_decode_32(&p); maplen = ceph_decode_32(&p); dout("handle_map epoch %u len %d\n", epoch, (int)maplen); /* do we need it? */ - ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch); + ceph_monc_got_mdsmap(&mdsc->client->monc, epoch); mutex_lock(&mdsc->mutex); if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { dout("handle_map epoch %u <= our %u\n", @@ -3212,7 +3176,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) } else { mdsc->mdsmap = newmap; /* first mds map */ } - mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; + mdsc->client->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; __wake_requests(mdsc, &mdsc->waiting_for_map); @@ -3313,7 +3277,7 @@ static int get_authorizer(struct ceph_connection *con, { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; - struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; + struct ceph_auth_client *ac = mdsc->client->monc.auth; int ret = 0; if (force_new && s->s_authorizer) { @@ -3347,7 +3311,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len) { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; - struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; + struct ceph_auth_client *ac = mdsc->client->monc.auth; return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); } @@ -3356,12 +3320,12 @@ static int invalidate_authorizer(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; - struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; + struct ceph_auth_client *ac = mdsc->client->monc.auth; if (ac->ops->invalidate_authorizer) ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); - return ceph_monc_validate_auth(&mdsc->fsc->client->monc); + return ceph_monc_validate_auth(&mdsc->client->monc); } static const struct ceph_connection_operations mds_con_ops = { @@ -3374,4 +3338,7 @@ static const struct ceph_connection_operations mds_con_ops = { .peer_reset = peer_reset, }; + + + /* eof */ diff --git a/trunk/fs/ceph/mds_client.h b/trunk/fs/ceph/mds_client.h index d66d63c72355..c98267ce6d2a 100644 --- a/trunk/fs/ceph/mds_client.h +++ b/trunk/fs/ceph/mds_client.h @@ -8,9 +8,9 @@ #include #include -#include -#include -#include +#include "types.h" +#include "messenger.h" +#include "mdsmap.h" /* * Some lock dependencies: @@ -26,7 +26,7 @@ * */ -struct ceph_fs_client; +struct ceph_client; struct ceph_cap; /* @@ -230,7 +230,7 @@ struct ceph_mds_request { * mds client state */ struct ceph_mds_client { - struct ceph_fs_client *fsc; + struct ceph_client *client; struct mutex mutex; /* all nested structures */ struct ceph_mdsmap *mdsmap; @@ -289,6 +289,11 @@ struct ceph_mds_client { int caps_avail_count; /* unused, unreserved */ int caps_min_count; /* keep at least this many (unreserved) */ + +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_file; +#endif + spinlock_t dentry_lru_lock; struct list_head dentry_lru; int num_dentry; @@ -311,9 +316,10 @@ extern void ceph_put_mds_session(struct ceph_mds_session *s); extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, struct ceph_msg *msg, int mds); -extern int ceph_mdsc_init(struct ceph_fs_client *fsc); +extern int ceph_mdsc_init(struct ceph_mds_client *mdsc, + struct ceph_client *client); extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); -extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); +extern void ceph_mdsc_stop(struct ceph_mds_client *mdsc); extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); diff --git a/trunk/fs/ceph/mdsmap.c b/trunk/fs/ceph/mdsmap.c index 73b7d44e8a35..040be6d1150b 100644 --- a/trunk/fs/ceph/mdsmap.c +++ b/trunk/fs/ceph/mdsmap.c @@ -1,4 +1,4 @@ -#include +#include "ceph_debug.h" #include #include @@ -6,9 +6,9 @@ #include #include -#include -#include -#include +#include "mdsmap.h" +#include "messenger.h" +#include "decode.h" #include "super.h" @@ -117,8 +117,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) } dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", - i+1, n, global_id, mds, inc, - ceph_pr_addr(&addr.in_addr), + i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr), ceph_mds_state_name(state)); if (mds >= 0 && mds < m->m_max_mds && state > 0) { m->m_info[mds].global_id = global_id; diff --git a/trunk/include/linux/ceph/mdsmap.h b/trunk/fs/ceph/mdsmap.h similarity index 100% rename from trunk/include/linux/ceph/mdsmap.h rename to trunk/fs/ceph/mdsmap.h diff --git a/trunk/net/ceph/messenger.c b/trunk/fs/ceph/messenger.c similarity index 89% rename from trunk/net/ceph/messenger.c rename to trunk/fs/ceph/messenger.c index 0e8157ee5d43..2502d76fcec1 100644 --- a/trunk/net/ceph/messenger.c +++ b/trunk/fs/ceph/messenger.c @@ -1,4 +1,4 @@ -#include +#include "ceph_debug.h" #include #include @@ -9,14 +9,12 @@ #include #include #include -#include -#include #include -#include -#include -#include -#include +#include "super.h" +#include "messenger.h" +#include "decode.h" +#include "pagelist.h" /* * Ceph uses the messenger to exchange ceph_msg messages with other @@ -50,7 +48,7 @@ static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; static DEFINE_SPINLOCK(addr_str_lock); static int last_addr_str; -const char *ceph_pr_addr(const struct sockaddr_storage *ss) +const char *pr_addr(const struct sockaddr_storage *ss) { int i; char *s; @@ -81,7 +79,6 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss) return s; } -EXPORT_SYMBOL(ceph_pr_addr); static void encode_my_addr(struct ceph_messenger *msgr) { @@ -94,7 +91,7 @@ static void encode_my_addr(struct ceph_messenger *msgr) */ struct workqueue_struct *ceph_msgr_wq; -int ceph_msgr_init(void) +int __init ceph_msgr_init(void) { ceph_msgr_wq = create_workqueue("ceph-msgr"); if (IS_ERR(ceph_msgr_wq)) { @@ -105,19 +102,16 @@ int ceph_msgr_init(void) } return 0; } -EXPORT_SYMBOL(ceph_msgr_init); void ceph_msgr_exit(void) { destroy_workqueue(ceph_msgr_wq); } -EXPORT_SYMBOL(ceph_msgr_exit); void ceph_msgr_flush(void) { flush_workqueue(ceph_msgr_wq); } -EXPORT_SYMBOL(ceph_msgr_flush); /* @@ -227,19 +221,19 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) set_sock_callbacks(sock, con); - dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); + dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), O_NONBLOCK); if (ret == -EINPROGRESS) { dout("connect %s EINPROGRESS sk_state = %u\n", - ceph_pr_addr(&con->peer_addr.in_addr), + pr_addr(&con->peer_addr.in_addr), sock->sk->sk_state); ret = 0; } if (ret < 0) { pr_err("connect %s error %d\n", - ceph_pr_addr(&con->peer_addr.in_addr), ret); + pr_addr(&con->peer_addr.in_addr), ret); sock_release(sock); con->sock = NULL; con->error_msg = "connect error"; @@ -340,8 +334,7 @@ static void reset_connection(struct ceph_connection *con) */ void ceph_con_close(struct ceph_connection *con) { - dout("con_close %p peer %s\n", con, - ceph_pr_addr(&con->peer_addr.in_addr)); + dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr)); set_bit(CLOSED, &con->state); /* in case there's queued work */ clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ @@ -354,21 +347,19 @@ void ceph_con_close(struct ceph_connection *con) mutex_unlock(&con->mutex); queue_con(con); } -EXPORT_SYMBOL(ceph_con_close); /* * Reopen a closed connection, with a new peer address. */ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) { - dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); + dout("con_open %p %s\n", con, pr_addr(&addr->in_addr)); set_bit(OPENING, &con->state); clear_bit(CLOSED, &con->state); memcpy(&con->peer_addr, addr, sizeof(*addr)); con->delay = 0; /* reset backoff memory */ queue_con(con); } -EXPORT_SYMBOL(ceph_con_open); /* * return true if this connection ever successfully opened @@ -415,7 +406,6 @@ void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) INIT_LIST_HEAD(&con->out_sent); INIT_DELAYED_WORK(&con->work, con_work); } -EXPORT_SYMBOL(ceph_con_init); /* @@ -539,11 +529,8 @@ static void prepare_write_message(struct ceph_connection *con) if (le32_to_cpu(m->hdr.data_len) > 0) { /* initialize page iterator */ con->out_msg_pos.page = 0; - if (m->pages) - con->out_msg_pos.page_pos = - le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; - else - con->out_msg_pos.page_pos = 0; + con->out_msg_pos.page_pos = + le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; con->out_msg_pos.data_pos = 0; con->out_msg_pos.did_page_crc = 0; con->out_more = 1; /* data + footer will follow */ @@ -660,7 +647,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, con->connect_seq, global_seq, proto); - con->out_connect.features = cpu_to_le64(msgr->supported_features); + con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED); con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); con->out_connect.global_seq = cpu_to_le32(global_seq); @@ -725,31 +712,6 @@ static int write_partial_kvec(struct ceph_connection *con) return ret; /* done! */ } -#ifdef CONFIG_BLOCK -static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) -{ - if (!bio) { - *iter = NULL; - *seg = 0; - return; - } - *iter = bio; - *seg = bio->bi_idx; -} - -static void iter_bio_next(struct bio **bio_iter, int *seg) -{ - if (*bio_iter == NULL) - return; - - BUG_ON(*seg >= (*bio_iter)->bi_vcnt); - - (*seg)++; - if (*seg == (*bio_iter)->bi_vcnt) - init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); -} -#endif - /* * Write as much message data payload as we can. If we finish, queue * up the footer. @@ -764,46 +726,21 @@ static int write_partial_msg_pages(struct ceph_connection *con) size_t len; int crc = con->msgr->nocrc; int ret; - int total_max_write; - int in_trail = 0; - size_t trail_len = (msg->trail ? msg->trail->length : 0); dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, con->out_msg_pos.page_pos); -#ifdef CONFIG_BLOCK - if (msg->bio && !msg->bio_iter) - init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); -#endif - - while (data_len > con->out_msg_pos.data_pos) { + while (con->out_msg_pos.page < con->out_msg->nr_pages) { struct page *page = NULL; void *kaddr = NULL; - int max_write = PAGE_SIZE; - int page_shift = 0; - - total_max_write = data_len - trail_len - - con->out_msg_pos.data_pos; /* * if we are calculating the data crc (the default), we need * to map the page. if our pages[] has been revoked, use the * zero page. */ - - /* have we reached the trail part of the data? */ - if (con->out_msg_pos.data_pos >= data_len - trail_len) { - in_trail = 1; - - total_max_write = data_len - con->out_msg_pos.data_pos; - - page = list_first_entry(&msg->trail->head, - struct page, lru); - if (crc) - kaddr = kmap(page); - max_write = PAGE_SIZE; - } else if (msg->pages) { + if (msg->pages) { page = msg->pages[con->out_msg_pos.page]; if (crc) kaddr = kmap(page); @@ -812,25 +749,13 @@ static int write_partial_msg_pages(struct ceph_connection *con) struct page, lru); if (crc) kaddr = kmap(page); -#ifdef CONFIG_BLOCK - } else if (msg->bio) { - struct bio_vec *bv; - - bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); - page = bv->bv_page; - page_shift = bv->bv_offset; - if (crc) - kaddr = kmap(page) + page_shift; - max_write = bv->bv_len; -#endif } else { page = con->msgr->zero_page; if (crc) kaddr = page_address(con->msgr->zero_page); } - len = min_t(int, max_write - con->out_msg_pos.page_pos, - total_max_write); - + len = min((int)(PAGE_SIZE - con->out_msg_pos.page_pos), + (int)(data_len - con->out_msg_pos.data_pos)); if (crc && !con->out_msg_pos.did_page_crc) { void *base = kaddr + con->out_msg_pos.page_pos; u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); @@ -840,14 +765,13 @@ static int write_partial_msg_pages(struct ceph_connection *con) cpu_to_le32(crc32c(tmpcrc, base, len)); con->out_msg_pos.did_page_crc = 1; } + ret = kernel_sendpage(con->sock, page, - con->out_msg_pos.page_pos + page_shift, - len, + con->out_msg_pos.page_pos, len, MSG_DONTWAIT | MSG_NOSIGNAL | MSG_MORE); - if (crc && - (msg->pages || msg->pagelist || msg->bio || in_trail)) + if (crc && (msg->pages || msg->pagelist)) kunmap(page); if (ret <= 0) @@ -859,16 +783,9 @@ static int write_partial_msg_pages(struct ceph_connection *con) con->out_msg_pos.page_pos = 0; con->out_msg_pos.page++; con->out_msg_pos.did_page_crc = 0; - if (in_trail) - list_move_tail(&page->lru, - &msg->trail->head); - else if (msg->pagelist) + if (msg->pagelist) list_move_tail(&page->lru, &msg->pagelist->head); -#ifdef CONFIG_BLOCK - else if (msg->bio) - iter_bio_next(&msg->bio_iter, &msg->bio_seg); -#endif } } @@ -1021,7 +938,7 @@ static int verify_hello(struct ceph_connection *con) { if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { pr_err("connect to %s got bad banner\n", - ceph_pr_addr(&con->peer_addr.in_addr)); + pr_addr(&con->peer_addr.in_addr)); con->error_msg = "protocol error, bad banner"; return -1; } @@ -1124,7 +1041,7 @@ int ceph_parse_ips(const char *c, const char *end, addr_set_port(ss, port); - dout("parse_ips got %s\n", ceph_pr_addr(ss)); + dout("parse_ips got %s\n", pr_addr(ss)); if (p == end) break; @@ -1144,7 +1061,6 @@ int ceph_parse_ips(const char *c, const char *end, pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); return -EINVAL; } -EXPORT_SYMBOL(ceph_parse_ips); static int process_banner(struct ceph_connection *con) { @@ -1166,9 +1082,9 @@ static int process_banner(struct ceph_connection *con) !(addr_is_blank(&con->actual_peer_addr.in_addr) && con->actual_peer_addr.nonce == con->peer_addr.nonce)) { pr_warning("wrong peer, want %s/%d, got %s/%d\n", - ceph_pr_addr(&con->peer_addr.in_addr), + pr_addr(&con->peer_addr.in_addr), (int)le32_to_cpu(con->peer_addr.nonce), - ceph_pr_addr(&con->actual_peer_addr.in_addr), + pr_addr(&con->actual_peer_addr.in_addr), (int)le32_to_cpu(con->actual_peer_addr.nonce)); con->error_msg = "wrong peer at address"; return -1; @@ -1186,7 +1102,7 @@ static int process_banner(struct ceph_connection *con) addr_set_port(&con->msgr->inst.addr.in_addr, port); encode_my_addr(con->msgr); dout("process_banner learned my addr is %s\n", - ceph_pr_addr(&con->msgr->inst.addr.in_addr)); + pr_addr(&con->msgr->inst.addr.in_addr)); } set_bit(NEGOTIATING, &con->state); @@ -1207,8 +1123,8 @@ static void fail_protocol(struct ceph_connection *con) static int process_connect(struct ceph_connection *con) { - u64 sup_feat = con->msgr->supported_features; - u64 req_feat = con->msgr->required_features; + u64 sup_feat = CEPH_FEATURE_SUPPORTED; + u64 req_feat = CEPH_FEATURE_REQUIRED; u64 server_feat = le64_to_cpu(con->in_reply.features); dout("process_connect on %p tag %d\n", con, (int)con->in_tag); @@ -1218,7 +1134,7 @@ static int process_connect(struct ceph_connection *con) pr_err("%s%lld %s feature set mismatch," " my %llx < server's %llx, missing %llx\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr), + pr_addr(&con->peer_addr.in_addr), sup_feat, server_feat, server_feat & ~sup_feat); con->error_msg = "missing required protocol features"; fail_protocol(con); @@ -1228,7 +1144,7 @@ static int process_connect(struct ceph_connection *con) pr_err("%s%lld %s protocol version mismatch," " my %d != server's %d\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr), + pr_addr(&con->peer_addr.in_addr), le32_to_cpu(con->out_connect.protocol_version), le32_to_cpu(con->in_reply.protocol_version)); con->error_msg = "protocol version mismatch"; @@ -1262,7 +1178,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->in_connect.connect_seq)); pr_err("%s%lld %s connection reset\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr)); + pr_addr(&con->peer_addr.in_addr)); reset_connection(con); prepare_write_connect(con->msgr, con, 0); prepare_read_connect(con); @@ -1307,7 +1223,7 @@ static int process_connect(struct ceph_connection *con) pr_err("%s%lld %s protocol feature mismatch," " my required %llx > server's %llx, need %llx\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr), + pr_addr(&con->peer_addr.in_addr), req_feat, server_feat, req_feat & ~server_feat); con->error_msg = "missing required protocol features"; fail_protocol(con); @@ -1389,7 +1305,8 @@ static int read_partial_message_section(struct ceph_connection *con, struct kvec *section, unsigned int sec_len, u32 *crc) { - int ret, left; + int left; + int ret; BUG_ON(!section); @@ -1412,83 +1329,13 @@ static int read_partial_message_section(struct ceph_connection *con, static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip); - - -static int read_partial_message_pages(struct ceph_connection *con, - struct page **pages, - unsigned data_len, int datacrc) -{ - void *p; - int ret; - int left; - - left = min((int)(data_len - con->in_msg_pos.data_pos), - (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); - /* (page) data */ - BUG_ON(pages == NULL); - p = kmap(pages[con->in_msg_pos.page]); - ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, - left); - if (ret > 0 && datacrc) - con->in_data_crc = - crc32c(con->in_data_crc, - p + con->in_msg_pos.page_pos, ret); - kunmap(pages[con->in_msg_pos.page]); - if (ret <= 0) - return ret; - con->in_msg_pos.data_pos += ret; - con->in_msg_pos.page_pos += ret; - if (con->in_msg_pos.page_pos == PAGE_SIZE) { - con->in_msg_pos.page_pos = 0; - con->in_msg_pos.page++; - } - - return ret; -} - -#ifdef CONFIG_BLOCK -static int read_partial_message_bio(struct ceph_connection *con, - struct bio **bio_iter, int *bio_seg, - unsigned data_len, int datacrc) -{ - struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); - void *p; - int ret, left; - - if (IS_ERR(bv)) - return PTR_ERR(bv); - - left = min((int)(data_len - con->in_msg_pos.data_pos), - (int)(bv->bv_len - con->in_msg_pos.page_pos)); - - p = kmap(bv->bv_page) + bv->bv_offset; - - ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, - left); - if (ret > 0 && datacrc) - con->in_data_crc = - crc32c(con->in_data_crc, - p + con->in_msg_pos.page_pos, ret); - kunmap(bv->bv_page); - if (ret <= 0) - return ret; - con->in_msg_pos.data_pos += ret; - con->in_msg_pos.page_pos += ret; - if (con->in_msg_pos.page_pos == bv->bv_len) { - con->in_msg_pos.page_pos = 0; - iter_bio_next(bio_iter, bio_seg); - } - - return ret; -} -#endif - /* * read (part of) a message. */ static int read_partial_message(struct ceph_connection *con) { struct ceph_msg *m = con->in_msg; + void *p; int ret; int to, left; unsigned front_len, middle_len, data_len, data_off; @@ -1534,7 +1381,7 @@ static int read_partial_message(struct ceph_connection *con) if ((s64)seq - (s64)con->in_seq < 1) { pr_info("skipping %s%lld %s seq %lld, expected %lld\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr), + pr_addr(&con->peer_addr.in_addr), seq, con->in_seq + 1); con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); @@ -1575,10 +1422,7 @@ static int read_partial_message(struct ceph_connection *con) m->middle->vec.iov_len = 0; con->in_msg_pos.page = 0; - if (m->pages) - con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; - else - con->in_msg_pos.page_pos = 0; + con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; con->in_msg_pos.data_pos = 0; } @@ -1596,29 +1440,27 @@ static int read_partial_message(struct ceph_connection *con) if (ret <= 0) return ret; } -#ifdef CONFIG_BLOCK - if (m->bio && !m->bio_iter) - init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); -#endif /* (page) data */ while (con->in_msg_pos.data_pos < data_len) { - if (m->pages) { - ret = read_partial_message_pages(con, m->pages, - data_len, datacrc); - if (ret <= 0) - return ret; -#ifdef CONFIG_BLOCK - } else if (m->bio) { - - ret = read_partial_message_bio(con, - &m->bio_iter, &m->bio_seg, - data_len, datacrc); - if (ret <= 0) - return ret; -#endif - } else { - BUG_ON(1); + left = min((int)(data_len - con->in_msg_pos.data_pos), + (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); + BUG_ON(m->pages == NULL); + p = kmap(m->pages[con->in_msg_pos.page]); + ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, + left); + if (ret > 0 && datacrc) + con->in_data_crc = + crc32c(con->in_data_crc, + p + con->in_msg_pos.page_pos, ret); + kunmap(m->pages[con->in_msg_pos.page]); + if (ret <= 0) + return ret; + con->in_msg_pos.data_pos += ret; + con->in_msg_pos.page_pos += ret; + if (con->in_msg_pos.page_pos == PAGE_SIZE) { + con->in_msg_pos.page_pos = 0; + con->in_msg_pos.page++; } } @@ -2032,9 +1874,9 @@ static void con_work(struct work_struct *work) static void ceph_fault(struct ceph_connection *con) { pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); + pr_addr(&con->peer_addr.in_addr), con->error_msg); dout("fault %p state %lu to peer %s\n", - con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); + con, con->state, pr_addr(&con->peer_addr.in_addr)); if (test_bit(LOSSYTX, &con->state)) { dout("fault on LOSSYTX channel\n"); @@ -2094,9 +1936,7 @@ static void ceph_fault(struct ceph_connection *con) /* * create a new messenger instance */ -struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, - u32 supported_features, - u32 required_features) +struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) { struct ceph_messenger *msgr; @@ -2104,9 +1944,6 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, if (msgr == NULL) return ERR_PTR(-ENOMEM); - msgr->supported_features = supported_features; - msgr->required_features = required_features; - spin_lock_init(&msgr->global_seq_lock); /* the zero page is needed if a request is "canceled" while the message @@ -2129,7 +1966,6 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, dout("messenger_create %p\n", msgr); return msgr; } -EXPORT_SYMBOL(ceph_messenger_create); void ceph_messenger_destroy(struct ceph_messenger *msgr) { @@ -2139,7 +1975,6 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr) kfree(msgr); dout("destroyed messenger %p\n", msgr); } -EXPORT_SYMBOL(ceph_messenger_destroy); /* * Queue up an outgoing message on the given connection. @@ -2176,7 +2011,6 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) queue_con(con); } -EXPORT_SYMBOL(ceph_con_send); /* * Revoke a message that was previously queued for send @@ -2242,7 +2076,6 @@ void ceph_con_keepalive(struct ceph_connection *con) test_and_set_bit(WRITE_PENDING, &con->state) == 0) queue_con(con); } -EXPORT_SYMBOL(ceph_con_keepalive); /* @@ -2303,10 +2136,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) m->nr_pages = 0; m->pages = NULL; m->pagelist = NULL; - m->bio = NULL; - m->bio_iter = NULL; - m->bio_seg = 0; - m->trail = NULL; dout("ceph_msg_new %p front %d\n", m, front_len); return m; @@ -2317,7 +2146,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) pr_err("msg_new can't create type %d front %d\n", type, front_len); return NULL; } -EXPORT_SYMBOL(ceph_msg_new); /* * Allocate "middle" portion of a message, if it is needed and wasn't @@ -2422,14 +2250,11 @@ void ceph_msg_last_put(struct kref *kref) m->pagelist = NULL; } - m->trail = NULL; - if (m->pool) ceph_msgpool_put(m->pool, m); else ceph_msg_kfree(m); } -EXPORT_SYMBOL(ceph_msg_last_put); void ceph_msg_dump(struct ceph_msg *msg) { @@ -2450,4 +2275,3 @@ void ceph_msg_dump(struct ceph_msg *msg) DUMP_PREFIX_OFFSET, 16, 1, &msg->footer, sizeof(msg->footer), true); } -EXPORT_SYMBOL(ceph_msg_dump); diff --git a/trunk/include/linux/ceph/messenger.h b/trunk/fs/ceph/messenger.h similarity index 95% rename from trunk/include/linux/ceph/messenger.h rename to trunk/fs/ceph/messenger.h index 5956d62c3057..76fbc957bc13 100644 --- a/trunk/include/linux/ceph/messenger.h +++ b/trunk/fs/ceph/messenger.h @@ -65,9 +65,6 @@ struct ceph_messenger { */ u32 global_seq; spinlock_t global_seq_lock; - - u32 supported_features; - u32 required_features; }; /* @@ -85,10 +82,6 @@ struct ceph_msg { struct ceph_pagelist *pagelist; /* instead of pages */ struct list_head list_head; struct kref kref; - struct bio *bio; /* instead of pages/pagelist */ - struct bio *bio_iter; /* bio iterator */ - int bio_seg; /* current bio segment */ - struct ceph_pagelist *trail; /* the trailing part of the data */ bool front_is_vmalloc; bool more_to_follow; bool needs_out_seq; @@ -212,7 +205,7 @@ struct ceph_connection { }; -extern const char *ceph_pr_addr(const struct sockaddr_storage *ss); +extern const char *pr_addr(const struct sockaddr_storage *ss); extern int ceph_parse_ips(const char *c, const char *end, struct ceph_entity_addr *addr, int max_count, int *count); @@ -223,8 +216,7 @@ extern void ceph_msgr_exit(void); extern void ceph_msgr_flush(void); extern struct ceph_messenger *ceph_messenger_create( - struct ceph_entity_addr *myaddr, - u32 features, u32 required); + struct ceph_entity_addr *myaddr); extern void ceph_messenger_destroy(struct ceph_messenger *); extern void ceph_con_init(struct ceph_messenger *msgr, diff --git a/trunk/net/ceph/mon_client.c b/trunk/fs/ceph/mon_client.c similarity index 94% rename from trunk/net/ceph/mon_client.c rename to trunk/fs/ceph/mon_client.c index 8a079399174a..b2a5a3e4a671 100644 --- a/trunk/net/ceph/mon_client.c +++ b/trunk/fs/ceph/mon_client.c @@ -1,16 +1,14 @@ -#include +#include "ceph_debug.h" -#include #include #include #include #include -#include -#include -#include - -#include +#include "mon_client.h" +#include "super.h" +#include "auth.h" +#include "decode.h" /* * Interact with Ceph monitor cluster. Handle requests for new map @@ -76,7 +74,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end) m->num_mon); for (i = 0; i < m->num_mon; i++) dout("monmap_decode mon%d is %s\n", i, - ceph_pr_addr(&m->mon_inst[i].addr.in_addr)); + pr_addr(&m->mon_inst[i].addr.in_addr)); return m; bad: @@ -193,33 +191,30 @@ static void __send_subscribe(struct ceph_mon_client *monc) struct ceph_msg *msg = monc->m_subscribe; struct ceph_mon_subscribe_item *i; void *p, *end; - int num; p = msg->front.iov_base; end = p + msg->front_max; - num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap; - ceph_encode_32(&p, num); - + dout("__send_subscribe to 'mdsmap' %u+\n", + (unsigned)monc->have_mdsmap); if (monc->want_next_osdmap) { dout("__send_subscribe to 'osdmap' %u\n", (unsigned)monc->have_osdmap); + ceph_encode_32(&p, 3); ceph_encode_string(&p, end, "osdmap", 6); i = p; i->have = cpu_to_le64(monc->have_osdmap); i->onetime = 1; p += sizeof(*i); monc->want_next_osdmap = 2; /* requested */ + } else { + ceph_encode_32(&p, 2); } - if (monc->want_mdsmap) { - dout("__send_subscribe to 'mdsmap' %u+\n", - (unsigned)monc->have_mdsmap); - ceph_encode_string(&p, end, "mdsmap", 6); - i = p; - i->have = cpu_to_le64(monc->have_mdsmap); - i->onetime = 0; - p += sizeof(*i); - } + ceph_encode_string(&p, end, "mdsmap", 6); + i = p; + i->have = cpu_to_le64(monc->have_mdsmap); + i->onetime = 0; + p += sizeof(*i); ceph_encode_string(&p, end, "monmap", 6); i = p; i->have = 0; @@ -248,8 +243,7 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc, mutex_lock(&monc->mutex); if (monc->hunting) { pr_info("mon%d %s session established\n", - monc->cur_mon, - ceph_pr_addr(&monc->con->peer_addr.in_addr)); + monc->cur_mon, pr_addr(&monc->con->peer_addr.in_addr)); monc->hunting = false; } dout("handle_subscribe_ack after %d seconds\n", seconds); @@ -272,7 +266,6 @@ int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got) mutex_unlock(&monc->mutex); return 0; } -EXPORT_SYMBOL(ceph_monc_got_mdsmap); int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) { @@ -317,7 +310,6 @@ int ceph_monc_open_session(struct ceph_mon_client *monc) mutex_unlock(&monc->mutex); return 0; } -EXPORT_SYMBOL(ceph_monc_open_session); /* * The monitor responds with mount ack indicate mount success. The @@ -548,7 +540,6 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) kref_put(&req->kref, release_generic_request); return err; } -EXPORT_SYMBOL(ceph_monc_do_statfs); /* * pool ops @@ -660,7 +651,6 @@ int ceph_monc_create_snapid(struct ceph_mon_client *monc, pool, 0, (char *)snapid, sizeof(*snapid)); } -EXPORT_SYMBOL(ceph_monc_create_snapid); int ceph_monc_delete_snapid(struct ceph_mon_client *monc, u32 pool, u64 snapid) @@ -718,9 +708,9 @@ static void delayed_work(struct work_struct *work) */ static int build_initial_monmap(struct ceph_mon_client *monc) { - struct ceph_options *opt = monc->client->options; - struct ceph_entity_addr *mon_addr = opt->mon_addr; - int num_mon = opt->num_mon; + struct ceph_mount_args *args = monc->client->mount_args; + struct ceph_entity_addr *mon_addr = args->mon_addr; + int num_mon = args->num_mon; int i; /* build initial monmap */ @@ -738,6 +728,11 @@ static int build_initial_monmap(struct ceph_mon_client *monc) } monc->monmap->num_mon = num_mon; monc->have_fsid = false; + + /* release addr memory */ + kfree(args->mon_addr); + args->mon_addr = NULL; + args->num_mon = 0; return 0; } @@ -758,8 +753,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) monc->con = NULL; /* authentication */ - monc->auth = ceph_auth_init(cl->options->name, - cl->options->secret); + monc->auth = ceph_auth_init(cl->mount_args->name, + cl->mount_args->secret); if (IS_ERR(monc->auth)) return PTR_ERR(monc->auth); monc->auth->want_keys = @@ -813,7 +808,6 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) out: return err; } -EXPORT_SYMBOL(ceph_monc_init); void ceph_monc_stop(struct ceph_mon_client *monc) { @@ -838,7 +832,6 @@ void ceph_monc_stop(struct ceph_mon_client *monc) kfree(monc->monmap); } -EXPORT_SYMBOL(ceph_monc_stop); static void handle_auth_reply(struct ceph_mon_client *monc, struct ceph_msg *msg) @@ -896,7 +889,6 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc) mutex_unlock(&monc->mutex); return ret; } -EXPORT_SYMBOL(ceph_monc_validate_auth); /* * handle incoming message @@ -930,16 +922,15 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) ceph_monc_handle_map(monc, msg); break; + case CEPH_MSG_MDS_MAP: + ceph_mdsc_handle_map(&monc->client->mdsc, msg); + break; + case CEPH_MSG_OSD_MAP: ceph_osdc_handle_map(&monc->client->osdc, msg); break; default: - /* can the chained handler handle it? */ - if (monc->client->extra_mon_dispatch && - monc->client->extra_mon_dispatch(monc->client, msg) == 0) - break; - pr_err("received unknown message type %d %s\n", type, ceph_msg_type_name(type)); } @@ -1003,7 +994,7 @@ static void mon_fault(struct ceph_connection *con) if (monc->con && !monc->hunting) pr_info("mon%d %s session lost, " "hunting for new mon\n", monc->cur_mon, - ceph_pr_addr(&monc->con->peer_addr.in_addr)); + pr_addr(&monc->con->peer_addr.in_addr)); __close_session(monc); if (!monc->hunting) { diff --git a/trunk/include/linux/ceph/mon_client.h b/trunk/fs/ceph/mon_client.h similarity index 99% rename from trunk/include/linux/ceph/mon_client.h rename to trunk/fs/ceph/mon_client.h index 545f85917780..8e396f2c0963 100644 --- a/trunk/include/linux/ceph/mon_client.h +++ b/trunk/fs/ceph/mon_client.h @@ -79,7 +79,6 @@ struct ceph_mon_client { u64 last_tid; /* mds/osd map */ - int want_mdsmap; int want_next_osdmap; /* 1 = want, 2 = want+asked */ u32 have_osdmap, have_mdsmap; diff --git a/trunk/net/ceph/msgpool.c b/trunk/fs/ceph/msgpool.c similarity index 95% rename from trunk/net/ceph/msgpool.c rename to trunk/fs/ceph/msgpool.c index d5f2d97ac05c..dd65a6438131 100644 --- a/trunk/net/ceph/msgpool.c +++ b/trunk/fs/ceph/msgpool.c @@ -1,11 +1,11 @@ -#include +#include "ceph_debug.h" #include #include #include #include -#include +#include "msgpool.h" static void *alloc_fn(gfp_t gfp_mask, void *arg) { diff --git a/trunk/include/linux/ceph/msgpool.h b/trunk/fs/ceph/msgpool.h similarity index 100% rename from trunk/include/linux/ceph/msgpool.h rename to trunk/fs/ceph/msgpool.h diff --git a/trunk/include/linux/ceph/msgr.h b/trunk/fs/ceph/msgr.h similarity index 100% rename from trunk/include/linux/ceph/msgr.h rename to trunk/fs/ceph/msgr.h diff --git a/trunk/net/ceph/osd_client.c b/trunk/fs/ceph/osd_client.c similarity index 84% rename from trunk/net/ceph/osd_client.c rename to trunk/fs/ceph/osd_client.c index 79391994b3ed..3b5571b8ce22 100644 --- a/trunk/net/ceph/osd_client.c +++ b/trunk/fs/ceph/osd_client.c @@ -1,22 +1,17 @@ -#include +#include "ceph_debug.h" -#include #include #include #include #include #include #include -#ifdef CONFIG_BLOCK -#include -#endif -#include -#include -#include -#include -#include -#include +#include "super.h" +#include "osd_client.h" +#include "messenger.h" +#include "decode.h" +#include "auth.h" #define OSD_OP_FRONT_LEN 4096 #define OSD_OPREPLY_FRONT_LEN 512 @@ -27,59 +22,6 @@ static int __kick_requests(struct ceph_osd_client *osdc, static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); -static int op_needs_trail(int op) -{ - switch (op) { - case CEPH_OSD_OP_GETXATTR: - case CEPH_OSD_OP_SETXATTR: - case CEPH_OSD_OP_CMPXATTR: - case CEPH_OSD_OP_CALL: - return 1; - default: - return 0; - } -} - -static int op_has_extent(int op) -{ - return (op == CEPH_OSD_OP_READ || - op == CEPH_OSD_OP_WRITE); -} - -void ceph_calc_raw_layout(struct ceph_osd_client *osdc, - struct ceph_file_layout *layout, - u64 snapid, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op) -{ - struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; - u64 orig_len = *plen; - u64 objoff, objlen; /* extent in object */ - - reqhead->snapid = cpu_to_le64(snapid); - - /* object extent? */ - ceph_calc_file_object_mapping(layout, off, plen, bno, - &objoff, &objlen); - if (*plen < orig_len) - dout(" skipping last %llu, final file extent %llu~%llu\n", - orig_len - *plen, off, *plen); - - if (op_has_extent(op->op)) { - op->extent.offset = objoff; - op->extent.length = objlen; - } - req->r_num_pages = calc_pages_for(off, *plen); - if (op->op == CEPH_OSD_OP_WRITE) - op->payload_len = *plen; - - dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", - *bno, objoff, objlen, req->r_num_pages); - -} -EXPORT_SYMBOL(ceph_calc_raw_layout); - /* * Implement client access to distributed object storage cluster. * @@ -106,19 +48,34 @@ EXPORT_SYMBOL(ceph_calc_raw_layout); * fill osd op in request message. */ static void calc_layout(struct ceph_osd_client *osdc, - struct ceph_vino vino, - struct ceph_file_layout *layout, + struct ceph_vino vino, struct ceph_file_layout *layout, u64 off, u64 *plen, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op) + struct ceph_osd_request *req) { + struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; + struct ceph_osd_op *op = (void *)(reqhead + 1); + u64 orig_len = *plen; + u64 objoff, objlen; /* extent in object */ u64 bno; - ceph_calc_raw_layout(osdc, layout, vino.snap, off, - plen, &bno, req, op); + reqhead->snapid = cpu_to_le64(vino.snap); + + /* object extent? */ + ceph_calc_file_object_mapping(layout, off, plen, &bno, + &objoff, &objlen); + if (*plen < orig_len) + dout(" skipping last %llu, final file extent %llu~%llu\n", + orig_len - *plen, off, *plen); sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); req->r_oid_len = strlen(req->r_oid); + + op->extent.offset = cpu_to_le64(objoff); + op->extent.length = cpu_to_le64(objlen); + req->r_num_pages = calc_pages_for(off, *plen); + + dout("calc_layout %s (%d) %llu~%llu (%d pages)\n", + req->r_oid, req->r_oid_len, objoff, objlen, req->r_num_pages); } /* @@ -144,66 +101,56 @@ void ceph_osdc_release_request(struct kref *kref) if (req->r_own_pages) ceph_release_page_vector(req->r_pages, req->r_num_pages); -#ifdef CONFIG_BLOCK - if (req->r_bio) - bio_put(req->r_bio); -#endif ceph_put_snap_context(req->r_snapc); - if (req->r_trail) { - ceph_pagelist_release(req->r_trail); - kfree(req->r_trail); - } if (req->r_mempool) mempool_free(req, req->r_osdc->req_mempool); else kfree(req); } -EXPORT_SYMBOL(ceph_osdc_release_request); - -static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail) -{ - int i = 0; - - if (needs_trail) - *needs_trail = 0; - while (ops[i].op) { - if (needs_trail && op_needs_trail(ops[i].op)) - *needs_trail = 1; - i++; - } - - return i; -} -struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, - int flags, +/* + * build new request AND message, calculate layout, and adjust file + * extent as needed. + * + * if the file was recently truncated, we include information about its + * old and new size so that the object can be updated appropriately. (we + * avoid synchronously deleting truncated objects because it's slow.) + * + * if @do_sync, include a 'startsync' command so that the osd will flush + * data quickly. + */ +struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, + struct ceph_file_layout *layout, + struct ceph_vino vino, + u64 off, u64 *plen, + int opcode, int flags, struct ceph_snap_context *snapc, - struct ceph_osd_req_op *ops, - bool use_mempool, - gfp_t gfp_flags, - struct page **pages, - struct bio *bio) + int do_sync, + u32 truncate_seq, + u64 truncate_size, + struct timespec *mtime, + bool use_mempool, int num_reply) { struct ceph_osd_request *req; struct ceph_msg *msg; - int needs_trail; - int num_op = get_num_ops(ops, &needs_trail); - size_t msg_size = sizeof(struct ceph_osd_request_head); - - msg_size += num_op*sizeof(struct ceph_osd_op); + struct ceph_osd_request_head *head; + struct ceph_osd_op *op; + void *p; + int num_op = 1 + do_sync; + size_t msg_size = sizeof(*head) + num_op*sizeof(*op); + int i; if (use_mempool) { - req = mempool_alloc(osdc->req_mempool, gfp_flags); + req = mempool_alloc(osdc->req_mempool, GFP_NOFS); memset(req, 0, sizeof(*req)); } else { - req = kzalloc(sizeof(*req), gfp_flags); + req = kzalloc(sizeof(*req), GFP_NOFS); } if (req == NULL) return NULL; req->r_osdc = osdc; req->r_mempool = use_mempool; - kref_init(&req->r_kref); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); @@ -217,22 +164,13 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); else msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, - OSD_OPREPLY_FRONT_LEN, gfp_flags); + OSD_OPREPLY_FRONT_LEN, GFP_NOFS); if (!msg) { ceph_osdc_put_request(req); return NULL; } req->r_reply = msg; - /* allocate space for the trailing data */ - if (needs_trail) { - req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags); - if (!req->r_trail) { - ceph_osdc_put_request(req); - return NULL; - } - ceph_pagelist_init(req->r_trail); - } /* create request message; allow space for oid */ msg_size += 40; if (snapc) @@ -240,115 +178,18 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else - msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags); + msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS); if (!msg) { ceph_osdc_put_request(req); return NULL; } - msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); memset(msg->front.iov_base, 0, msg->front.iov_len); - - req->r_request = msg; - req->r_pages = pages; -#ifdef CONFIG_BLOCK - if (bio) { - req->r_bio = bio; - bio_get(req->r_bio); - } -#endif - - return req; -} -EXPORT_SYMBOL(ceph_osdc_alloc_request); - -static void osd_req_encode_op(struct ceph_osd_request *req, - struct ceph_osd_op *dst, - struct ceph_osd_req_op *src) -{ - dst->op = cpu_to_le16(src->op); - - switch (dst->op) { - case CEPH_OSD_OP_READ: - case CEPH_OSD_OP_WRITE: - dst->extent.offset = - cpu_to_le64(src->extent.offset); - dst->extent.length = - cpu_to_le64(src->extent.length); - dst->extent.truncate_size = - cpu_to_le64(src->extent.truncate_size); - dst->extent.truncate_seq = - cpu_to_le32(src->extent.truncate_seq); - break; - - case CEPH_OSD_OP_GETXATTR: - case CEPH_OSD_OP_SETXATTR: - case CEPH_OSD_OP_CMPXATTR: - BUG_ON(!req->r_trail); - - dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); - dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); - dst->xattr.cmp_op = src->xattr.cmp_op; - dst->xattr.cmp_mode = src->xattr.cmp_mode; - ceph_pagelist_append(req->r_trail, src->xattr.name, - src->xattr.name_len); - ceph_pagelist_append(req->r_trail, src->xattr.val, - src->xattr.value_len); - break; - case CEPH_OSD_OP_CALL: - BUG_ON(!req->r_trail); - - dst->cls.class_len = src->cls.class_len; - dst->cls.method_len = src->cls.method_len; - dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); - - ceph_pagelist_append(req->r_trail, src->cls.class_name, - src->cls.class_len); - ceph_pagelist_append(req->r_trail, src->cls.method_name, - src->cls.method_len); - ceph_pagelist_append(req->r_trail, src->cls.indata, - src->cls.indata_len); - break; - case CEPH_OSD_OP_ROLLBACK: - dst->snap.snapid = cpu_to_le64(src->snap.snapid); - break; - case CEPH_OSD_OP_STARTSYNC: - break; - default: - pr_err("unrecognized osd opcode %d\n", dst->op); - WARN_ON(1); - break; - } - dst->payload_len = cpu_to_le32(src->payload_len); -} - -/* - * build new request AND message - * - */ -void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 *plen, - struct ceph_osd_req_op *src_ops, - struct ceph_snap_context *snapc, - struct timespec *mtime, - const char *oid, - int oid_len) -{ - struct ceph_msg *msg = req->r_request; - struct ceph_osd_request_head *head; - struct ceph_osd_req_op *src_op; - struct ceph_osd_op *op; - void *p; - int num_op = get_num_ops(src_ops, NULL); - size_t msg_size = sizeof(*head) + num_op*sizeof(*op); - int flags = req->r_flags; - u64 data_len = 0; - int i; - head = msg->front.iov_base; op = (void *)(head + 1); p = (void *)(op + num_op); + req->r_request = msg; req->r_snapc = ceph_get_snap_context(snapc); head->client_inc = cpu_to_le32(1); /* always, for now. */ @@ -356,23 +197,29 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, if (flags & CEPH_OSD_FLAG_WRITE) ceph_encode_timespec(&head->mtime, mtime); head->num_ops = cpu_to_le16(num_op); + op->op = cpu_to_le16(opcode); + /* calculate max write size */ + calc_layout(osdc, vino, layout, off, plen, req); + req->r_file_layout = *layout; /* keep a copy */ - /* fill in oid */ - head->object_len = cpu_to_le32(oid_len); - memcpy(p, oid, oid_len); - p += oid_len; - - src_op = src_ops; - while (src_op->op) { - osd_req_encode_op(req, op, src_op); - src_op++; - op++; + if (flags & CEPH_OSD_FLAG_WRITE) { + req->r_request->hdr.data_off = cpu_to_le16(off); + req->r_request->hdr.data_len = cpu_to_le32(*plen); + op->payload_len = cpu_to_le32(*plen); } + op->extent.truncate_size = cpu_to_le64(truncate_size); + op->extent.truncate_seq = cpu_to_le32(truncate_seq); - if (req->r_trail) - data_len += req->r_trail->length; + /* fill in oid */ + head->object_len = cpu_to_le32(req->r_oid_len); + memcpy(p, req->r_oid, req->r_oid_len); + p += req->r_oid_len; + if (do_sync) { + op++; + op->op = cpu_to_le16(CEPH_OSD_OP_STARTSYNC); + } if (snapc) { head->snap_seq = cpu_to_le64(snapc->seq); head->num_snaps = cpu_to_le32(snapc->num_snaps); @@ -382,79 +229,12 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, } } - if (flags & CEPH_OSD_FLAG_WRITE) { - req->r_request->hdr.data_off = cpu_to_le16(off); - req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len); - } else if (data_len) { - req->r_request->hdr.data_off = 0; - req->r_request->hdr.data_len = cpu_to_le32(data_len); - } - BUG_ON(p > msg->front.iov_base + msg->front.iov_len); msg_size = p - msg->front.iov_base; msg->front.iov_len = msg_size; msg->hdr.front_len = cpu_to_le32(msg_size); - return; -} -EXPORT_SYMBOL(ceph_osdc_build_request); - -/* - * build new request AND message, calculate layout, and adjust file - * extent as needed. - * - * if the file was recently truncated, we include information about its - * old and new size so that the object can be updated appropriately. (we - * avoid synchronously deleting truncated objects because it's slow.) - * - * if @do_sync, include a 'startsync' command so that the osd will flush - * data quickly. - */ -struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, - struct ceph_file_layout *layout, - struct ceph_vino vino, - u64 off, u64 *plen, - int opcode, int flags, - struct ceph_snap_context *snapc, - int do_sync, - u32 truncate_seq, - u64 truncate_size, - struct timespec *mtime, - bool use_mempool, int num_reply) -{ - struct ceph_osd_req_op ops[3]; - struct ceph_osd_request *req; - - ops[0].op = opcode; - ops[0].extent.truncate_seq = truncate_seq; - ops[0].extent.truncate_size = truncate_size; - ops[0].payload_len = 0; - - if (do_sync) { - ops[1].op = CEPH_OSD_OP_STARTSYNC; - ops[1].payload_len = 0; - ops[2].op = 0; - } else - ops[1].op = 0; - - req = ceph_osdc_alloc_request(osdc, flags, - snapc, ops, - use_mempool, - GFP_NOFS, NULL, NULL); - if (IS_ERR(req)) - return req; - - /* calculate max write size */ - calc_layout(osdc, vino, layout, off, plen, req, ops); - req->r_file_layout = *layout; /* keep a copy */ - - ceph_osdc_build_request(req, off, plen, ops, - snapc, - mtime, - req->r_oid, req->r_oid_len); - return req; } -EXPORT_SYMBOL(ceph_osdc_new_request); /* * We keep osd requests in an rbtree, sorted by ->r_tid. @@ -609,7 +389,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc, dout("__move_osd_to_lru %p\n", osd); BUG_ON(!list_empty(&osd->o_osd_lru)); list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); - osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; + osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ; } static void __remove_osd_from_lru(struct ceph_osd *osd) @@ -703,7 +483,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) static void __schedule_osd_timeout(struct ceph_osd_client *osdc) { schedule_delayed_work(&osdc->timeout_work, - osdc->client->options->osd_keepalive_timeout * HZ); + osdc->client->mount_args->osd_keepalive_timeout * HZ); } static void __cancel_osd_timeout(struct ceph_osd_client *osdc) @@ -904,9 +684,9 @@ static void handle_timeout(struct work_struct *work) container_of(work, struct ceph_osd_client, timeout_work.work); struct ceph_osd_request *req, *last_req = NULL; struct ceph_osd *osd; - unsigned long timeout = osdc->client->options->osd_timeout * HZ; + unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; unsigned long keepalive = - osdc->client->options->osd_keepalive_timeout * HZ; + osdc->client->mount_args->osd_keepalive_timeout * HZ; unsigned long last_stamp = 0; struct rb_node *p; struct list_head slow_osds; @@ -993,7 +773,7 @@ static void handle_osds_timeout(struct work_struct *work) container_of(work, struct ceph_osd_client, osds_timeout_work.work); unsigned long delay = - osdc->client->options->osd_idle_ttl * HZ >> 2; + osdc->client->mount_args->osd_idle_ttl * HZ >> 2; dout("osds timeout\n"); down_read(&osdc->map_sem); @@ -1324,10 +1104,6 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, req->r_request->pages = req->r_pages; req->r_request->nr_pages = req->r_num_pages; -#ifdef CONFIG_BLOCK - req->r_request->bio = req->r_bio; -#endif - req->r_request->trail = req->r_trail; register_request(osdc, req); @@ -1355,7 +1131,6 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, up_read(&osdc->map_sem); return rc; } -EXPORT_SYMBOL(ceph_osdc_start_request); /* * wait for a request to complete @@ -1378,7 +1153,6 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); return req->r_result; } -EXPORT_SYMBOL(ceph_osdc_wait_request); /* * sync - wait for all in-flight requests to flush. avoid starvation. @@ -1412,7 +1186,6 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) mutex_unlock(&osdc->request_mutex); dout("sync done (thru tid %llu)\n", last_tid); } -EXPORT_SYMBOL(ceph_osdc_sync); /* * init, shutdown @@ -1438,7 +1211,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); schedule_delayed_work(&osdc->osds_timeout_work, - round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); + round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ)); err = -ENOMEM; osdc->req_mempool = mempool_create_kmalloc_pool(10, @@ -1464,7 +1237,6 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) out: return err; } -EXPORT_SYMBOL(ceph_osdc_init); void ceph_osdc_stop(struct ceph_osd_client *osdc) { @@ -1479,7 +1251,6 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) ceph_msgpool_destroy(&osdc->msgpool_op); ceph_msgpool_destroy(&osdc->msgpool_op_reply); } -EXPORT_SYMBOL(ceph_osdc_stop); /* * Read some contiguous pages. If we cross a stripe boundary, shorten @@ -1517,7 +1288,6 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, dout("readpages result %d\n", rc); return rc; } -EXPORT_SYMBOL(ceph_osdc_readpages); /* * do a synchronous write on N pages @@ -1560,7 +1330,6 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, dout("writepages result %d\n", rc); return rc; } -EXPORT_SYMBOL(ceph_osdc_writepages); /* * handle incoming message @@ -1651,9 +1420,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, } m->pages = req->r_pages; m->nr_pages = req->r_num_pages; -#ifdef CONFIG_BLOCK - m->bio = req->r_bio; -#endif } *skip = 0; req->r_con_filling_msg = ceph_con_get(con); diff --git a/trunk/include/linux/ceph/osd_client.h b/trunk/fs/ceph/osd_client.h similarity index 76% rename from trunk/include/linux/ceph/osd_client.h rename to trunk/fs/ceph/osd_client.h index 6c91fb032c39..ce776989ef6a 100644 --- a/trunk/include/linux/ceph/osd_client.h +++ b/trunk/fs/ceph/osd_client.h @@ -15,7 +15,6 @@ struct ceph_snap_context; struct ceph_osd_request; struct ceph_osd_client; struct ceph_authorizer; -struct ceph_pagelist; /* * completion callback for async writepages @@ -69,7 +68,6 @@ struct ceph_osd_request { struct list_head r_unsafe_item; struct inode *r_inode; /* for use by callbacks */ - void *r_priv; /* ditto */ char r_oid[40]; /* object name */ int r_oid_len; @@ -82,11 +80,6 @@ struct ceph_osd_request { struct page **r_pages; /* pages for data payload */ int r_pages_from_pool; int r_own_pages; /* if true, i own page list */ -#ifdef CONFIG_BLOCK - struct bio *r_bio; /* instead of pages */ -#endif - - struct ceph_pagelist *r_trail; /* trailing part of the data */ }; struct ceph_osd_client { @@ -117,42 +110,6 @@ struct ceph_osd_client { struct ceph_msgpool msgpool_op_reply; }; -struct ceph_osd_req_op { - u16 op; /* CEPH_OSD_OP_* */ - u32 flags; /* CEPH_OSD_FLAG_* */ - union { - struct { - u64 offset, length; - u64 truncate_size; - u32 truncate_seq; - } extent; - struct { - const char *name; - u32 name_len; - const char *val; - u32 value_len; - __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ - __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ - } xattr; - struct { - const char *class_name; - __u8 class_len; - const char *method_name; - __u8 method_len; - __u8 argc; - const char *indata; - u32 indata_len; - } cls; - struct { - u64 cookie, count; - } pgls; - struct { - u64 snapid; - } snap; - }; - u32 payload_len; -}; - extern int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client); extern void ceph_osdc_stop(struct ceph_osd_client *osdc); @@ -162,30 +119,6 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); -extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc, - struct ceph_file_layout *layout, - u64 snapid, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op); - -extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, - int flags, - struct ceph_snap_context *snapc, - struct ceph_osd_req_op *ops, - bool use_mempool, - gfp_t gfp_flags, - struct page **pages, - struct bio *bio); - -extern void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 *plen, - struct ceph_osd_req_op *src_ops, - struct ceph_snap_context *snapc, - struct timespec *mtime, - const char *oid, - int oid_len); - extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, struct ceph_file_layout *layout, struct ceph_vino vino, diff --git a/trunk/net/ceph/osdmap.c b/trunk/fs/ceph/osdmap.c similarity index 97% rename from trunk/net/ceph/osdmap.c rename to trunk/fs/ceph/osdmap.c index d73f3f6efa36..e31f118f1392 100644 --- a/trunk/net/ceph/osdmap.c +++ b/trunk/fs/ceph/osdmap.c @@ -1,15 +1,14 @@ -#include +#include "ceph_debug.h" -#include #include #include -#include -#include -#include -#include -#include +#include "super.h" +#include "osdmap.h" +#include "crush/hash.h" +#include "crush/mapper.h" +#include "decode.h" char *ceph_osdmap_state_str(char *str, int len, int state) { @@ -418,20 +417,6 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) return NULL; } -int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) -{ - struct rb_node *rbp; - - for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) { - struct ceph_pg_pool_info *pi = - rb_entry(rbp, struct ceph_pg_pool_info, node); - if (pi->name && strcmp(pi->name, name) == 0) - return pi->id; - } - return -ENOENT; -} -EXPORT_SYMBOL(ceph_pg_poolid_by_name); - static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) { rb_erase(&pi->node, root); @@ -981,7 +966,6 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); } -EXPORT_SYMBOL(ceph_calc_file_object_mapping); /* * calculate an object layout (i.e. pgid) from an oid, @@ -1027,7 +1011,6 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, ol->ol_stripe_unit = fl->fl_object_stripe_unit; return 0; } -EXPORT_SYMBOL(ceph_calc_object_layout); /* * Calculate raw osd vector for the given pgid. Return pointer to osd @@ -1125,4 +1108,3 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) return osds[i]; return -1; } -EXPORT_SYMBOL(ceph_calc_pg_primary); diff --git a/trunk/include/linux/ceph/osdmap.h b/trunk/fs/ceph/osdmap.h similarity index 97% rename from trunk/include/linux/ceph/osdmap.h rename to trunk/fs/ceph/osdmap.h index ba4c205cbb01..970b547e510d 100644 --- a/trunk/include/linux/ceph/osdmap.h +++ b/trunk/fs/ceph/osdmap.h @@ -4,7 +4,7 @@ #include #include "types.h" #include "ceph_fs.h" -#include +#include "crush/crush.h" /* * The osd map describes the current membership of the osd cluster and @@ -125,6 +125,4 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); -extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); - #endif diff --git a/trunk/fs/ceph/pagelist.c b/trunk/fs/ceph/pagelist.c new file mode 100644 index 000000000000..46a368b6dce5 --- /dev/null +++ b/trunk/fs/ceph/pagelist.c @@ -0,0 +1,63 @@ + +#include +#include +#include + +#include "pagelist.h" + +static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) +{ + struct page *page = list_entry(pl->head.prev, struct page, + lru); + kunmap(page); +} + +int ceph_pagelist_release(struct ceph_pagelist *pl) +{ + if (pl->mapped_tail) + ceph_pagelist_unmap_tail(pl); + + while (!list_empty(&pl->head)) { + struct page *page = list_first_entry(&pl->head, struct page, + lru); + list_del(&page->lru); + __free_page(page); + } + return 0; +} + +static int ceph_pagelist_addpage(struct ceph_pagelist *pl) +{ + struct page *page = __page_cache_alloc(GFP_NOFS); + if (!page) + return -ENOMEM; + pl->room += PAGE_SIZE; + list_add_tail(&page->lru, &pl->head); + if (pl->mapped_tail) + ceph_pagelist_unmap_tail(pl); + pl->mapped_tail = kmap(page); + return 0; +} + +int ceph_pagelist_append(struct ceph_pagelist *pl, void *buf, size_t len) +{ + while (pl->room < len) { + size_t bit = pl->room; + int ret; + + memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), + buf, bit); + pl->length += bit; + pl->room -= bit; + buf += bit; + len -= bit; + ret = ceph_pagelist_addpage(pl); + if (ret) + return ret; + } + + memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); + pl->length += len; + pl->room -= len; + return 0; +} diff --git a/trunk/include/linux/ceph/pagelist.h b/trunk/fs/ceph/pagelist.h similarity index 62% rename from trunk/include/linux/ceph/pagelist.h rename to trunk/fs/ceph/pagelist.h index 9660d6b0a35d..e8a4187e1087 100644 --- a/trunk/include/linux/ceph/pagelist.h +++ b/trunk/fs/ceph/pagelist.h @@ -8,14 +8,6 @@ struct ceph_pagelist { void *mapped_tail; size_t length; size_t room; - struct list_head free_list; - size_t num_pages_free; -}; - -struct ceph_pagelist_cursor { - struct ceph_pagelist *pl; /* pagelist, for error checking */ - struct list_head *page_lru; /* page in list */ - size_t room; /* room remaining to reset to */ }; static inline void ceph_pagelist_init(struct ceph_pagelist *pl) @@ -24,23 +16,10 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl) pl->mapped_tail = NULL; pl->length = 0; pl->room = 0; - INIT_LIST_HEAD(&pl->free_list); - pl->num_pages_free = 0; } - extern int ceph_pagelist_release(struct ceph_pagelist *pl); -extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l); - -extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space); - -extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl); - -extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, - struct ceph_pagelist_cursor *c); - -extern int ceph_pagelist_truncate(struct ceph_pagelist *pl, - struct ceph_pagelist_cursor *c); +extern int ceph_pagelist_append(struct ceph_pagelist *pl, void *d, size_t l); static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) { diff --git a/trunk/include/linux/ceph/rados.h b/trunk/fs/ceph/rados.h similarity index 100% rename from trunk/include/linux/ceph/rados.h rename to trunk/fs/ceph/rados.h diff --git a/trunk/fs/ceph/snap.c b/trunk/fs/ceph/snap.c index 39c243acd062..190b6c4a6f2b 100644 --- a/trunk/fs/ceph/snap.c +++ b/trunk/fs/ceph/snap.c @@ -1,12 +1,10 @@ -#include +#include "ceph_debug.h" #include #include #include "super.h" -#include "mds_client.h" - -#include +#include "decode.h" /* * Snapshots in ceph are driven in large part by cooperation from the @@ -528,7 +526,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap) { struct inode *inode = &ci->vfs_inode; - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; BUG_ON(capsnap->writing); capsnap->size = inode->i_size; @@ -749,7 +747,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) { - struct super_block *sb = mdsc->fsc->sb; + struct super_block *sb = mdsc->client->sb; int mds = session->s_mds; u64 split; int op; diff --git a/trunk/fs/ceph/super.c b/trunk/fs/ceph/super.c index d6e0e0421891..9922628532b2 100644 --- a/trunk/fs/ceph/super.c +++ b/trunk/fs/ceph/super.c @@ -1,5 +1,5 @@ -#include +#include "ceph_debug.h" #include #include @@ -15,13 +15,10 @@ #include #include +#include "decode.h" #include "super.h" -#include "mds_client.h" - -#include -#include -#include -#include +#include "mon_client.h" +#include "auth.h" /* * Ceph superblock operations @@ -29,22 +26,36 @@ * Handle the basics of mounting, unmounting. */ + +/* + * find filename portion of a path (/foo/bar/baz -> baz) + */ +const char *ceph_file_part(const char *s, int len) +{ + const char *e = s + len; + + while (e != s && *(e-1) != '/') + e--; + return e; +} + + /* * super ops */ static void ceph_put_super(struct super_block *s) { - struct ceph_fs_client *fsc = ceph_sb_to_client(s); + struct ceph_client *client = ceph_sb_to_client(s); dout("put_super\n"); - ceph_mdsc_close_sessions(fsc->mdsc); + ceph_mdsc_close_sessions(&client->mdsc); /* * ensure we release the bdi before put_anon_super releases * the device name. */ - if (s->s_bdi == &fsc->backing_dev_info) { - bdi_unregister(&fsc->backing_dev_info); + if (s->s_bdi == &client->backing_dev_info) { + bdi_unregister(&client->backing_dev_info); s->s_bdi = NULL; } @@ -53,14 +64,14 @@ static void ceph_put_super(struct super_block *s) static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode); - struct ceph_monmap *monmap = fsc->client->monc.monmap; + struct ceph_client *client = ceph_inode_to_client(dentry->d_inode); + struct ceph_monmap *monmap = client->monc.monmap; struct ceph_statfs st; u64 fsid; int err; dout("statfs\n"); - err = ceph_monc_do_statfs(&fsc->client->monc, &st); + err = ceph_monc_do_statfs(&client->monc, &st); if (err < 0) return err; @@ -93,28 +104,238 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) static int ceph_sync_fs(struct super_block *sb, int wait) { - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_client *client = ceph_sb_to_client(sb); if (!wait) { dout("sync_fs (non-blocking)\n"); - ceph_flush_dirty_caps(fsc->mdsc); + ceph_flush_dirty_caps(&client->mdsc); dout("sync_fs (non-blocking) done\n"); return 0; } dout("sync_fs (blocking)\n"); - ceph_osdc_sync(&fsc->client->osdc); - ceph_mdsc_sync(fsc->mdsc); + ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); + ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); dout("sync_fs (blocking) done\n"); return 0; } +static int default_congestion_kb(void) +{ + int congestion_kb; + + /* + * Copied from NFS + * + * congestion size, scale with available memory. + * + * 64MB: 8192k + * 128MB: 11585k + * 256MB: 16384k + * 512MB: 23170k + * 1GB: 32768k + * 2GB: 46340k + * 4GB: 65536k + * 8GB: 92681k + * 16GB: 131072k + * + * This allows larger machines to have larger/more transfers. + * Limit the default to 256M + */ + congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); + if (congestion_kb > 256*1024) + congestion_kb = 256*1024; + + return congestion_kb; +} + +/** + * ceph_show_options - Show mount options in /proc/mounts + * @m: seq_file to write to + * @mnt: mount descriptor + */ +static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct ceph_client *client = ceph_sb_to_client(mnt->mnt_sb); + struct ceph_mount_args *args = client->mount_args; + + if (args->flags & CEPH_OPT_FSID) + seq_printf(m, ",fsid=%pU", &args->fsid); + if (args->flags & CEPH_OPT_NOSHARE) + seq_puts(m, ",noshare"); + if (args->flags & CEPH_OPT_DIRSTAT) + seq_puts(m, ",dirstat"); + if ((args->flags & CEPH_OPT_RBYTES) == 0) + seq_puts(m, ",norbytes"); + if (args->flags & CEPH_OPT_NOCRC) + seq_puts(m, ",nocrc"); + if (args->flags & CEPH_OPT_NOASYNCREADDIR) + seq_puts(m, ",noasyncreaddir"); + + if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) + seq_printf(m, ",mount_timeout=%d", args->mount_timeout); + if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) + seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl); + if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) + seq_printf(m, ",osdtimeout=%d", args->osd_timeout); + if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) + seq_printf(m, ",osdkeepalivetimeout=%d", + args->osd_keepalive_timeout); + if (args->wsize) + seq_printf(m, ",wsize=%d", args->wsize); + if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT) + seq_printf(m, ",rsize=%d", args->rsize); + if (args->congestion_kb != default_congestion_kb()) + seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb); + if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) + seq_printf(m, ",caps_wanted_delay_min=%d", + args->caps_wanted_delay_min); + if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) + seq_printf(m, ",caps_wanted_delay_max=%d", + args->caps_wanted_delay_max); + if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) + seq_printf(m, ",cap_release_safety=%d", + args->cap_release_safety); + if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) + seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); + if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) + seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes); + if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) + seq_printf(m, ",snapdirname=%s", args->snapdir_name); + if (args->name) + seq_printf(m, ",name=%s", args->name); + if (args->secret) + seq_puts(m, ",secret="); + return 0; +} + +/* + * caches + */ +struct kmem_cache *ceph_inode_cachep; +struct kmem_cache *ceph_cap_cachep; +struct kmem_cache *ceph_dentry_cachep; +struct kmem_cache *ceph_file_cachep; + +static void ceph_inode_init_once(void *foo) +{ + struct ceph_inode_info *ci = foo; + inode_init_once(&ci->vfs_inode); +} + +static int __init init_caches(void) +{ + ceph_inode_cachep = kmem_cache_create("ceph_inode_info", + sizeof(struct ceph_inode_info), + __alignof__(struct ceph_inode_info), + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), + ceph_inode_init_once); + if (ceph_inode_cachep == NULL) + return -ENOMEM; + + ceph_cap_cachep = KMEM_CACHE(ceph_cap, + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); + if (ceph_cap_cachep == NULL) + goto bad_cap; + + ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); + if (ceph_dentry_cachep == NULL) + goto bad_dentry; + + ceph_file_cachep = KMEM_CACHE(ceph_file_info, + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); + if (ceph_file_cachep == NULL) + goto bad_file; + + return 0; + +bad_file: + kmem_cache_destroy(ceph_dentry_cachep); +bad_dentry: + kmem_cache_destroy(ceph_cap_cachep); +bad_cap: + kmem_cache_destroy(ceph_inode_cachep); + return -ENOMEM; +} + +static void destroy_caches(void) +{ + kmem_cache_destroy(ceph_inode_cachep); + kmem_cache_destroy(ceph_cap_cachep); + kmem_cache_destroy(ceph_dentry_cachep); + kmem_cache_destroy(ceph_file_cachep); +} + + +/* + * ceph_umount_begin - initiate forced umount. Tear down down the + * mount, skipping steps that may hang while waiting for server(s). + */ +static void ceph_umount_begin(struct super_block *sb) +{ + struct ceph_client *client = ceph_sb_to_client(sb); + + dout("ceph_umount_begin - starting forced umount\n"); + if (!client) + return; + client->mount_state = CEPH_MOUNT_SHUTDOWN; + return; +} + +static const struct super_operations ceph_super_ops = { + .alloc_inode = ceph_alloc_inode, + .destroy_inode = ceph_destroy_inode, + .write_inode = ceph_write_inode, + .sync_fs = ceph_sync_fs, + .put_super = ceph_put_super, + .show_options = ceph_show_options, + .statfs = ceph_statfs, + .umount_begin = ceph_umount_begin, +}; + + +const char *ceph_msg_type_name(int type) +{ + switch (type) { + case CEPH_MSG_SHUTDOWN: return "shutdown"; + case CEPH_MSG_PING: return "ping"; + case CEPH_MSG_AUTH: return "auth"; + case CEPH_MSG_AUTH_REPLY: return "auth_reply"; + case CEPH_MSG_MON_MAP: return "mon_map"; + case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; + case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; + case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; + case CEPH_MSG_STATFS: return "statfs"; + case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; + case CEPH_MSG_MDS_MAP: return "mds_map"; + case CEPH_MSG_CLIENT_SESSION: return "client_session"; + case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; + case CEPH_MSG_CLIENT_REQUEST: return "client_request"; + case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; + case CEPH_MSG_CLIENT_REPLY: return "client_reply"; + case CEPH_MSG_CLIENT_CAPS: return "client_caps"; + case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; + case CEPH_MSG_CLIENT_SNAP: return "client_snap"; + case CEPH_MSG_CLIENT_LEASE: return "client_lease"; + case CEPH_MSG_OSD_MAP: return "osd_map"; + case CEPH_MSG_OSD_OP: return "osd_op"; + case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; + default: return "unknown"; + } +} + + /* * mount options */ enum { Opt_wsize, Opt_rsize, + Opt_osdtimeout, + Opt_osdkeepalivetimeout, + Opt_mount_timeout, + Opt_osd_idle_ttl, Opt_caps_wanted_delay_min, Opt_caps_wanted_delay_max, Opt_cap_release_safety, @@ -123,19 +344,29 @@ enum { Opt_congestion_kb, Opt_last_int, /* int args above */ + Opt_fsid, Opt_snapdirname, + Opt_name, + Opt_secret, Opt_last_string, /* string args above */ + Opt_ip, + Opt_noshare, Opt_dirstat, Opt_nodirstat, Opt_rbytes, Opt_norbytes, + Opt_nocrc, Opt_noasyncreaddir, }; -static match_table_t fsopt_tokens = { +static match_table_t arg_tokens = { {Opt_wsize, "wsize=%d"}, {Opt_rsize, "rsize=%d"}, + {Opt_osdtimeout, "osdtimeout=%d"}, + {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, + {Opt_mount_timeout, "mount_timeout=%d"}, + {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, {Opt_cap_release_safety, "cap_release_safety=%d"}, @@ -143,459 +374,403 @@ static match_table_t fsopt_tokens = { {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, {Opt_congestion_kb, "write_congestion_kb=%d"}, /* int args above */ + {Opt_fsid, "fsid=%s"}, {Opt_snapdirname, "snapdirname=%s"}, + {Opt_name, "name=%s"}, + {Opt_secret, "secret=%s"}, /* string args above */ + {Opt_ip, "ip=%s"}, + {Opt_noshare, "noshare"}, {Opt_dirstat, "dirstat"}, {Opt_nodirstat, "nodirstat"}, {Opt_rbytes, "rbytes"}, {Opt_norbytes, "norbytes"}, + {Opt_nocrc, "nocrc"}, {Opt_noasyncreaddir, "noasyncreaddir"}, {-1, NULL} }; -static int parse_fsopt_token(char *c, void *private) +static int parse_fsid(const char *str, struct ceph_fsid *fsid) { - struct ceph_mount_options *fsopt = private; - substring_t argstr[MAX_OPT_ARGS]; - int token, intval, ret; - - token = match_token((char *)c, fsopt_tokens, argstr); - if (token < 0) - return -EINVAL; - - if (token < Opt_last_int) { - ret = match_int(&argstr[0], &intval); - if (ret < 0) { - pr_err("bad mount option arg (not int) " - "at '%s'\n", c); - return ret; + int i = 0; + char tmp[3]; + int err = -EINVAL; + int d; + + dout("parse_fsid '%s'\n", str); + tmp[2] = 0; + while (*str && i < 16) { + if (ispunct(*str)) { + str++; + continue; } - dout("got int token %d val %d\n", token, intval); - } else if (token > Opt_last_int && token < Opt_last_string) { - dout("got string token %d val %s\n", token, - argstr[0].from); - } else { - dout("got token %d\n", token); - } - - switch (token) { - case Opt_snapdirname: - kfree(fsopt->snapdir_name); - fsopt->snapdir_name = kstrndup(argstr[0].from, - argstr[0].to-argstr[0].from, - GFP_KERNEL); - if (!fsopt->snapdir_name) - return -ENOMEM; - break; - - /* misc */ - case Opt_wsize: - fsopt->wsize = intval; - break; - case Opt_rsize: - fsopt->rsize = intval; - break; - case Opt_caps_wanted_delay_min: - fsopt->caps_wanted_delay_min = intval; - break; - case Opt_caps_wanted_delay_max: - fsopt->caps_wanted_delay_max = intval; - break; - case Opt_readdir_max_entries: - fsopt->max_readdir = intval; - break; - case Opt_readdir_max_bytes: - fsopt->max_readdir_bytes = intval; - break; - case Opt_congestion_kb: - fsopt->congestion_kb = intval; - break; - case Opt_dirstat: - fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; - break; - case Opt_nodirstat: - fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; - break; - case Opt_rbytes: - fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; - break; - case Opt_norbytes: - fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; - break; - case Opt_noasyncreaddir: - fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; - break; - default: - BUG_ON(token); + if (!isxdigit(str[0]) || !isxdigit(str[1])) + break; + tmp[0] = str[0]; + tmp[1] = str[1]; + if (sscanf(tmp, "%x", &d) < 1) + break; + fsid->fsid[i] = d & 0xff; + i++; + str += 2; } - return 0; -} - -static void destroy_mount_options(struct ceph_mount_options *args) -{ - dout("destroy_mount_options %p\n", args); - kfree(args->snapdir_name); - kfree(args); -} - -static int strcmp_null(const char *s1, const char *s2) -{ - if (!s1 && !s2) - return 0; - if (s1 && !s2) - return -1; - if (!s1 && s2) - return 1; - return strcmp(s1, s2); -} - -static int compare_mount_options(struct ceph_mount_options *new_fsopt, - struct ceph_options *new_opt, - struct ceph_fs_client *fsc) -{ - struct ceph_mount_options *fsopt1 = new_fsopt; - struct ceph_mount_options *fsopt2 = fsc->mount_options; - int ofs = offsetof(struct ceph_mount_options, snapdir_name); - int ret; - - ret = memcmp(fsopt1, fsopt2, ofs); - if (ret) - return ret; - - ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); - if (ret) - return ret; - return ceph_compare_options(new_opt, fsc->client); + if (i == 16) + err = 0; + dout("parse_fsid ret %d got fsid %pU", err, fsid); + return err; } -static int parse_mount_options(struct ceph_mount_options **pfsopt, - struct ceph_options **popt, - int flags, char *options, - const char *dev_name, - const char **path) +static struct ceph_mount_args *parse_mount_args(int flags, char *options, + const char *dev_name, + const char **path) { - struct ceph_mount_options *fsopt; - const char *dev_name_end; + struct ceph_mount_args *args; + const char *c; int err = -ENOMEM; + substring_t argstr[MAX_OPT_ARGS]; - fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); - if (!fsopt) - return -ENOMEM; + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return ERR_PTR(-ENOMEM); + args->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*args->mon_addr), + GFP_KERNEL); + if (!args->mon_addr) + goto out; + + dout("parse_mount_args %p, dev_name '%s'\n", args, dev_name); + + /* start with defaults */ + args->sb_flags = flags; + args->flags = CEPH_OPT_DEFAULT; + args->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; + args->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; + args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ + args->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ + args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; + args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; + args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; + args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); + args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; + args->max_readdir = CEPH_MAX_READDIR_DEFAULT; + args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; + args->congestion_kb = default_congestion_kb(); + + /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ + err = -EINVAL; + if (!dev_name) + goto out; + *path = strstr(dev_name, ":/"); + if (*path == NULL) { + pr_err("device name is missing path (no :/ in %s)\n", + dev_name); + goto out; + } - dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); - - fsopt->sb_flags = flags; - fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; - - fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; - fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); - fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; - fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; - fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; - fsopt->congestion_kb = default_congestion_kb(); - - /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ - err = -EINVAL; - if (!dev_name) - goto out; - *path = strstr(dev_name, ":/"); - if (*path == NULL) { - pr_err("device name is missing path (no :/ in %s)\n", - dev_name); - goto out; - } - dev_name_end = *path; - dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); + /* get mon ip(s) */ + err = ceph_parse_ips(dev_name, *path, args->mon_addr, + CEPH_MAX_MON, &args->num_mon); + if (err < 0) + goto out; /* path on server */ *path += 2; dout("server path '%s'\n", *path); - err = ceph_parse_options(popt, options, dev_name, dev_name_end, - parse_fsopt_token, (void *)fsopt); - if (err) - goto out; - - /* success */ - *pfsopt = fsopt; - return 0; + /* parse mount options */ + while ((c = strsep(&options, ",")) != NULL) { + int token, intval, ret; + if (!*c) + continue; + err = -EINVAL; + token = match_token((char *)c, arg_tokens, argstr); + if (token < 0) { + pr_err("bad mount option at '%s'\n", c); + goto out; + } + if (token < Opt_last_int) { + ret = match_int(&argstr[0], &intval); + if (ret < 0) { + pr_err("bad mount option arg (not int) " + "at '%s'\n", c); + continue; + } + dout("got int token %d val %d\n", token, intval); + } else if (token > Opt_last_int && token < Opt_last_string) { + dout("got string token %d val %s\n", token, + argstr[0].from); + } else { + dout("got token %d\n", token); + } + switch (token) { + case Opt_ip: + err = ceph_parse_ips(argstr[0].from, + argstr[0].to, + &args->my_addr, + 1, NULL); + if (err < 0) + goto out; + args->flags |= CEPH_OPT_MYIP; + break; + + case Opt_fsid: + err = parse_fsid(argstr[0].from, &args->fsid); + if (err == 0) + args->flags |= CEPH_OPT_FSID; + break; + case Opt_snapdirname: + kfree(args->snapdir_name); + args->snapdir_name = kstrndup(argstr[0].from, + argstr[0].to-argstr[0].from, + GFP_KERNEL); + break; + case Opt_name: + args->name = kstrndup(argstr[0].from, + argstr[0].to-argstr[0].from, + GFP_KERNEL); + break; + case Opt_secret: + args->secret = kstrndup(argstr[0].from, + argstr[0].to-argstr[0].from, + GFP_KERNEL); + break; + + /* misc */ + case Opt_wsize: + args->wsize = intval; + break; + case Opt_rsize: + args->rsize = intval; + break; + case Opt_osdtimeout: + args->osd_timeout = intval; + break; + case Opt_osdkeepalivetimeout: + args->osd_keepalive_timeout = intval; + break; + case Opt_osd_idle_ttl: + args->osd_idle_ttl = intval; + break; + case Opt_mount_timeout: + args->mount_timeout = intval; + break; + case Opt_caps_wanted_delay_min: + args->caps_wanted_delay_min = intval; + break; + case Opt_caps_wanted_delay_max: + args->caps_wanted_delay_max = intval; + break; + case Opt_readdir_max_entries: + args->max_readdir = intval; + break; + case Opt_readdir_max_bytes: + args->max_readdir_bytes = intval; + break; + case Opt_congestion_kb: + args->congestion_kb = intval; + break; + + case Opt_noshare: + args->flags |= CEPH_OPT_NOSHARE; + break; + + case Opt_dirstat: + args->flags |= CEPH_OPT_DIRSTAT; + break; + case Opt_nodirstat: + args->flags &= ~CEPH_OPT_DIRSTAT; + break; + case Opt_rbytes: + args->flags |= CEPH_OPT_RBYTES; + break; + case Opt_norbytes: + args->flags &= ~CEPH_OPT_RBYTES; + break; + case Opt_nocrc: + args->flags |= CEPH_OPT_NOCRC; + break; + case Opt_noasyncreaddir: + args->flags |= CEPH_OPT_NOASYNCREADDIR; + break; + + default: + BUG_ON(token); + } + } + return args; out: - destroy_mount_options(fsopt); - return err; -} - -/** - * ceph_show_options - Show mount options in /proc/mounts - * @m: seq_file to write to - * @mnt: mount descriptor - */ -static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) -{ - struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb); - struct ceph_mount_options *fsopt = fsc->mount_options; - struct ceph_options *opt = fsc->client->options; - - if (opt->flags & CEPH_OPT_FSID) - seq_printf(m, ",fsid=%pU", &opt->fsid); - if (opt->flags & CEPH_OPT_NOSHARE) - seq_puts(m, ",noshare"); - if (opt->flags & CEPH_OPT_NOCRC) - seq_puts(m, ",nocrc"); - - if (opt->name) - seq_printf(m, ",name=%s", opt->name); - if (opt->secret) - seq_puts(m, ",secret="); - - if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) - seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); - if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) - seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); - if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) - seq_printf(m, ",osdtimeout=%d", opt->osd_timeout); - if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) - seq_printf(m, ",osdkeepalivetimeout=%d", - opt->osd_keepalive_timeout); - - if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) - seq_puts(m, ",dirstat"); - if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0) - seq_puts(m, ",norbytes"); - if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) - seq_puts(m, ",noasyncreaddir"); - - if (fsopt->wsize) - seq_printf(m, ",wsize=%d", fsopt->wsize); - if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT) - seq_printf(m, ",rsize=%d", fsopt->rsize); - if (fsopt->congestion_kb != default_congestion_kb()) - seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); - if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) - seq_printf(m, ",caps_wanted_delay_min=%d", - fsopt->caps_wanted_delay_min); - if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) - seq_printf(m, ",caps_wanted_delay_max=%d", - fsopt->caps_wanted_delay_max); - if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) - seq_printf(m, ",cap_release_safety=%d", - fsopt->cap_release_safety); - if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) - seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); - if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) - seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); - if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) - seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); - return 0; + kfree(args->mon_addr); + kfree(args); + return ERR_PTR(err); } -/* - * handle any mon messages the standard library doesn't understand. - * return error if we don't either. - */ -static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) +static void destroy_mount_args(struct ceph_mount_args *args) { - struct ceph_fs_client *fsc = client->private; - int type = le16_to_cpu(msg->hdr.type); - - switch (type) { - case CEPH_MSG_MDS_MAP: - ceph_mdsc_handle_map(fsc->mdsc, msg); - return 0; - - default: - return -1; - } + dout("destroy_mount_args %p\n", args); + kfree(args->snapdir_name); + args->snapdir_name = NULL; + kfree(args->name); + args->name = NULL; + kfree(args->secret); + args->secret = NULL; + kfree(args); } /* - * create a new fs client + * create a fresh client instance */ -struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, - struct ceph_options *opt) +static struct ceph_client *ceph_create_client(struct ceph_mount_args *args) { - struct ceph_fs_client *fsc; + struct ceph_client *client; int err = -ENOMEM; - fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); - if (!fsc) + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (client == NULL) return ERR_PTR(-ENOMEM); - fsc->client = ceph_create_client(opt, fsc); - if (IS_ERR(fsc->client)) { - err = PTR_ERR(fsc->client); - goto fail; - } - fsc->client->extra_mon_dispatch = extra_mon_dispatch; - fsc->client->supported_features |= CEPH_FEATURE_FLOCK; - fsc->client->monc.want_mdsmap = 1; + mutex_init(&client->mount_mutex); + + init_waitqueue_head(&client->auth_wq); - fsc->mount_options = fsopt; + client->sb = NULL; + client->mount_state = CEPH_MOUNT_MOUNTING; + client->mount_args = args; - fsc->sb = NULL; - fsc->mount_state = CEPH_MOUNT_MOUNTING; + client->msgr = NULL; - atomic_long_set(&fsc->writeback_count, 0); + client->auth_err = 0; + atomic_long_set(&client->writeback_count, 0); - err = bdi_init(&fsc->backing_dev_info); + err = bdi_init(&client->backing_dev_info); if (err < 0) - goto fail_client; + goto fail; err = -ENOMEM; - fsc->wb_wq = create_workqueue("ceph-writeback"); - if (fsc->wb_wq == NULL) + client->wb_wq = create_workqueue("ceph-writeback"); + if (client->wb_wq == NULL) goto fail_bdi; - fsc->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid"); - if (fsc->pg_inv_wq == NULL) + client->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid"); + if (client->pg_inv_wq == NULL) goto fail_wb_wq; - fsc->trunc_wq = create_singlethread_workqueue("ceph-trunc"); - if (fsc->trunc_wq == NULL) + client->trunc_wq = create_singlethread_workqueue("ceph-trunc"); + if (client->trunc_wq == NULL) goto fail_pg_inv_wq; /* set up mempools */ err = -ENOMEM; - fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, - fsc->mount_options->wsize >> PAGE_CACHE_SHIFT); - if (!fsc->wb_pagevec_pool) + client->wb_pagevec_pool = mempool_create_kmalloc_pool(10, + client->mount_args->wsize >> PAGE_CACHE_SHIFT); + if (!client->wb_pagevec_pool) goto fail_trunc_wq; /* caps */ - fsc->min_caps = fsopt->max_readdir; - - return fsc; + client->min_caps = args->max_readdir; + /* subsystems */ + err = ceph_monc_init(&client->monc, client); + if (err < 0) + goto fail_mempool; + err = ceph_osdc_init(&client->osdc, client); + if (err < 0) + goto fail_monc; + err = ceph_mdsc_init(&client->mdsc, client); + if (err < 0) + goto fail_osdc; + return client; + +fail_osdc: + ceph_osdc_stop(&client->osdc); +fail_monc: + ceph_monc_stop(&client->monc); +fail_mempool: + mempool_destroy(client->wb_pagevec_pool); fail_trunc_wq: - destroy_workqueue(fsc->trunc_wq); + destroy_workqueue(client->trunc_wq); fail_pg_inv_wq: - destroy_workqueue(fsc->pg_inv_wq); + destroy_workqueue(client->pg_inv_wq); fail_wb_wq: - destroy_workqueue(fsc->wb_wq); + destroy_workqueue(client->wb_wq); fail_bdi: - bdi_destroy(&fsc->backing_dev_info); -fail_client: - ceph_destroy_client(fsc->client); + bdi_destroy(&client->backing_dev_info); fail: - kfree(fsc); + kfree(client); return ERR_PTR(err); } -void destroy_fs_client(struct ceph_fs_client *fsc) +static void ceph_destroy_client(struct ceph_client *client) { - dout("destroy_fs_client %p\n", fsc); + dout("destroy_client %p\n", client); - destroy_workqueue(fsc->wb_wq); - destroy_workqueue(fsc->pg_inv_wq); - destroy_workqueue(fsc->trunc_wq); + /* unmount */ + ceph_mdsc_stop(&client->mdsc); + ceph_osdc_stop(&client->osdc); - bdi_destroy(&fsc->backing_dev_info); + /* + * make sure mds and osd connections close out before destroying + * the auth module, which is needed to free those connections' + * ceph_authorizers. + */ + ceph_msgr_flush(); + + ceph_monc_stop(&client->monc); - mempool_destroy(fsc->wb_pagevec_pool); + ceph_debugfs_client_cleanup(client); + destroy_workqueue(client->wb_wq); + destroy_workqueue(client->pg_inv_wq); + destroy_workqueue(client->trunc_wq); - destroy_mount_options(fsc->mount_options); + bdi_destroy(&client->backing_dev_info); - ceph_fs_debugfs_cleanup(fsc); + if (client->msgr) + ceph_messenger_destroy(client->msgr); + mempool_destroy(client->wb_pagevec_pool); - ceph_destroy_client(fsc->client); + destroy_mount_args(client->mount_args); - kfree(fsc); - dout("destroy_fs_client %p done\n", fsc); + kfree(client); + dout("destroy_client %p done\n", client); } /* - * caches + * Initially learn our fsid, or verify an fsid matches. */ -struct kmem_cache *ceph_inode_cachep; -struct kmem_cache *ceph_cap_cachep; -struct kmem_cache *ceph_dentry_cachep; -struct kmem_cache *ceph_file_cachep; - -static void ceph_inode_init_once(void *foo) +int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) { - struct ceph_inode_info *ci = foo; - inode_init_once(&ci->vfs_inode); -} - -static int __init init_caches(void) -{ - ceph_inode_cachep = kmem_cache_create("ceph_inode_info", - sizeof(struct ceph_inode_info), - __alignof__(struct ceph_inode_info), - (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), - ceph_inode_init_once); - if (ceph_inode_cachep == NULL) - return -ENOMEM; - - ceph_cap_cachep = KMEM_CACHE(ceph_cap, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); - if (ceph_cap_cachep == NULL) - goto bad_cap; - - ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); - if (ceph_dentry_cachep == NULL) - goto bad_dentry; - - ceph_file_cachep = KMEM_CACHE(ceph_file_info, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); - if (ceph_file_cachep == NULL) - goto bad_file; - + if (client->have_fsid) { + if (ceph_fsid_compare(&client->fsid, fsid)) { + pr_err("bad fsid, had %pU got %pU", + &client->fsid, fsid); + return -1; + } + } else { + pr_info("client%lld fsid %pU\n", client->monc.auth->global_id, + fsid); + memcpy(&client->fsid, fsid, sizeof(*fsid)); + ceph_debugfs_client_init(client); + client->have_fsid = true; + } return 0; - -bad_file: - kmem_cache_destroy(ceph_dentry_cachep); -bad_dentry: - kmem_cache_destroy(ceph_cap_cachep); -bad_cap: - kmem_cache_destroy(ceph_inode_cachep); - return -ENOMEM; } -static void destroy_caches(void) -{ - kmem_cache_destroy(ceph_inode_cachep); - kmem_cache_destroy(ceph_cap_cachep); - kmem_cache_destroy(ceph_dentry_cachep); - kmem_cache_destroy(ceph_file_cachep); -} - - /* - * ceph_umount_begin - initiate forced umount. Tear down down the - * mount, skipping steps that may hang while waiting for server(s). + * true if we have the mon map (and have thus joined the cluster) */ -static void ceph_umount_begin(struct super_block *sb) +static int have_mon_and_osd_map(struct ceph_client *client) { - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); - - dout("ceph_umount_begin - starting forced umount\n"); - if (!fsc) - return; - fsc->mount_state = CEPH_MOUNT_SHUTDOWN; - return; + return client->monc.monmap && client->monc.monmap->epoch && + client->osdc.osdmap && client->osdc.osdmap->epoch; } -static const struct super_operations ceph_super_ops = { - .alloc_inode = ceph_alloc_inode, - .destroy_inode = ceph_destroy_inode, - .write_inode = ceph_write_inode, - .sync_fs = ceph_sync_fs, - .put_super = ceph_put_super, - .show_options = ceph_show_options, - .statfs = ceph_statfs, - .umount_begin = ceph_umount_begin, -}; - /* * Bootstrap mount by opening the root directory. Note the mount * @started time from caller, and time out if this takes too long. */ -static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, +static struct dentry *open_root_dentry(struct ceph_client *client, const char *path, unsigned long started) { - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = &client->mdsc; struct ceph_mds_request *req = NULL; int err; struct dentry *root; @@ -609,14 +784,14 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, req->r_ino1.ino = CEPH_INO_ROOT; req->r_ino1.snap = CEPH_NOSNAP; req->r_started = started; - req->r_timeout = fsc->client->options->mount_timeout * HZ; + req->r_timeout = client->mount_args->mount_timeout * HZ; req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); req->r_num_caps = 2; err = ceph_mdsc_do_request(mdsc, NULL, req); if (err == 0) { dout("open_root_inode success\n"); if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && - fsc->sb->s_root == NULL) + client->sb->s_root == NULL) root = d_alloc_root(req->r_target_inode); else root = d_obtain_alias(req->r_target_inode); @@ -629,86 +804,105 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, return root; } - - - /* * mount: join the ceph cluster, and open root directory. */ -static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt, +static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, const char *path) { + struct ceph_entity_addr *myaddr = NULL; int err; + unsigned long timeout = client->mount_args->mount_timeout * HZ; unsigned long started = jiffies; /* note the start time */ struct dentry *root; - int first = 0; /* first vfsmount for this super_block */ dout("mount start\n"); - mutex_lock(&fsc->client->mount_mutex); + mutex_lock(&client->mount_mutex); + + /* initialize the messenger */ + if (client->msgr == NULL) { + if (ceph_test_opt(client, MYIP)) + myaddr = &client->mount_args->my_addr; + client->msgr = ceph_messenger_create(myaddr); + if (IS_ERR(client->msgr)) { + err = PTR_ERR(client->msgr); + client->msgr = NULL; + goto out; + } + client->msgr->nocrc = ceph_test_opt(client, NOCRC); + } - err = __ceph_open_session(fsc->client, started); + /* open session, and wait for mon, mds, and osd maps */ + err = ceph_monc_open_session(&client->monc); if (err < 0) goto out; + while (!have_mon_and_osd_map(client)) { + err = -EIO; + if (timeout && time_after_eq(jiffies, started + timeout)) + goto out; + + /* wait */ + dout("mount waiting for mon_map\n"); + err = wait_event_interruptible_timeout(client->auth_wq, + have_mon_and_osd_map(client) || (client->auth_err < 0), + timeout); + if (err == -EINTR || err == -ERESTARTSYS) + goto out; + if (client->auth_err < 0) { + err = client->auth_err; + goto out; + } + } + dout("mount opening root\n"); - root = open_root_dentry(fsc, "", started); + root = open_root_dentry(client, "", started); if (IS_ERR(root)) { err = PTR_ERR(root); goto out; } - if (fsc->sb->s_root) { + if (client->sb->s_root) dput(root); - } else { - fsc->sb->s_root = root; - first = 1; - - err = ceph_fs_debugfs_init(fsc); - if (err < 0) - goto fail; - } + else + client->sb->s_root = root; if (path[0] == 0) { dget(root); } else { dout("mount opening base mountpoint\n"); - root = open_root_dentry(fsc, path, started); + root = open_root_dentry(client, path, started); if (IS_ERR(root)) { err = PTR_ERR(root); - goto fail; + dput(client->sb->s_root); + client->sb->s_root = NULL; + goto out; } } mnt->mnt_root = root; - mnt->mnt_sb = fsc->sb; + mnt->mnt_sb = client->sb; - fsc->mount_state = CEPH_MOUNT_MOUNTED; + client->mount_state = CEPH_MOUNT_MOUNTED; dout("mount success\n"); err = 0; out: - mutex_unlock(&fsc->client->mount_mutex); + mutex_unlock(&client->mount_mutex); return err; - -fail: - if (first) { - dput(fsc->sb->s_root); - fsc->sb->s_root = NULL; - } - goto out; } static int ceph_set_super(struct super_block *s, void *data) { - struct ceph_fs_client *fsc = data; + struct ceph_client *client = data; int ret; dout("set_super %p data %p\n", s, data); - s->s_flags = fsc->mount_options->sb_flags; + s->s_flags = client->mount_args->sb_flags; s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ - s->s_fs_info = fsc; - fsc->sb = s; + s->s_fs_info = client; + client->sb = s; s->s_op = &ceph_super_ops; s->s_export_op = &ceph_export_ops; @@ -723,7 +917,7 @@ static int ceph_set_super(struct super_block *s, void *data) fail: s->s_fs_info = NULL; - fsc->sb = NULL; + client->sb = NULL; return ret; } @@ -732,23 +926,30 @@ static int ceph_set_super(struct super_block *s, void *data) */ static int ceph_compare_super(struct super_block *sb, void *data) { - struct ceph_fs_client *new = data; - struct ceph_mount_options *fsopt = new->mount_options; - struct ceph_options *opt = new->client->options; - struct ceph_fs_client *other = ceph_sb_to_client(sb); + struct ceph_client *new = data; + struct ceph_mount_args *args = new->mount_args; + struct ceph_client *other = ceph_sb_to_client(sb); + int i; dout("ceph_compare_super %p\n", sb); - - if (compare_mount_options(fsopt, opt, other)) { - dout("monitor(s)/mount options don't match\n"); - return 0; - } - if ((opt->flags & CEPH_OPT_FSID) && - ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { - dout("fsid doesn't match\n"); - return 0; + if (args->flags & CEPH_OPT_FSID) { + if (ceph_fsid_compare(&args->fsid, &other->fsid)) { + dout("fsid doesn't match\n"); + return 0; + } + } else { + /* do we share (a) monitor? */ + for (i = 0; i < new->monc.monmap->num_mon; i++) + if (ceph_monmap_contains(other->monc.monmap, + &new->monc.monmap->mon_inst[i].addr)) + break; + if (i == new->monc.monmap->num_mon) { + dout("mon ip not part of monmap\n"); + return 0; + } + dout("mon ip matches existing sb %p\n", sb); } - if (fsopt->sb_flags != other->mount_options->sb_flags) { + if (args->sb_flags != other->mount_args->sb_flags) { dout("flags differ\n"); return 0; } @@ -760,20 +961,19 @@ static int ceph_compare_super(struct super_block *sb, void *data) */ static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); -static int ceph_register_bdi(struct super_block *sb, - struct ceph_fs_client *fsc) +static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) { int err; /* set ra_pages based on rsize mount option? */ - if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE) - fsc->backing_dev_info.ra_pages = - (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1) + if (client->mount_args->rsize >= PAGE_CACHE_SIZE) + client->backing_dev_info.ra_pages = + (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT; - err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", + err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", atomic_long_inc_return(&bdi_seq)); if (!err) - sb->s_bdi = &fsc->backing_dev_info; + sb->s_bdi = &client->backing_dev_info; return err; } @@ -782,52 +982,46 @@ static int ceph_get_sb(struct file_system_type *fs_type, struct vfsmount *mnt) { struct super_block *sb; - struct ceph_fs_client *fsc; + struct ceph_client *client; int err; int (*compare_super)(struct super_block *, void *) = ceph_compare_super; const char *path = NULL; - struct ceph_mount_options *fsopt = NULL; - struct ceph_options *opt = NULL; + struct ceph_mount_args *args; dout("ceph_get_sb\n"); - err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path); - if (err < 0) + args = parse_mount_args(flags, data, dev_name, &path); + if (IS_ERR(args)) { + err = PTR_ERR(args); goto out_final; + } /* create client (which we may/may not use) */ - fsc = create_fs_client(fsopt, opt); - if (IS_ERR(fsc)) { - err = PTR_ERR(fsc); - kfree(fsopt); - kfree(opt); + client = ceph_create_client(args); + if (IS_ERR(client)) { + err = PTR_ERR(client); goto out_final; } - err = ceph_mdsc_init(fsc); - if (err < 0) - goto out; - - if (ceph_test_opt(fsc->client, NOSHARE)) + if (client->mount_args->flags & CEPH_OPT_NOSHARE) compare_super = NULL; - sb = sget(fs_type, compare_super, ceph_set_super, fsc); + sb = sget(fs_type, compare_super, ceph_set_super, client); if (IS_ERR(sb)) { err = PTR_ERR(sb); goto out; } - if (ceph_sb_to_client(sb) != fsc) { - ceph_mdsc_destroy(fsc); - destroy_fs_client(fsc); - fsc = ceph_sb_to_client(sb); - dout("get_sb got existing client %p\n", fsc); + if (ceph_sb_to_client(sb) != client) { + ceph_destroy_client(client); + client = ceph_sb_to_client(sb); + dout("get_sb got existing client %p\n", client); } else { - dout("get_sb using new client %p\n", fsc); - err = ceph_register_bdi(sb, fsc); + dout("get_sb using new client %p\n", client); + err = ceph_register_bdi(sb, client); if (err < 0) goto out_splat; } - err = ceph_mount(fsc, mnt, path); + err = ceph_mount(client, mnt, path); if (err < 0) goto out_splat; dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, @@ -835,13 +1029,12 @@ static int ceph_get_sb(struct file_system_type *fs_type, return 0; out_splat: - ceph_mdsc_close_sessions(fsc->mdsc); + ceph_mdsc_close_sessions(&client->mdsc); deactivate_locked_super(sb); goto out_final; out: - ceph_mdsc_destroy(fsc); - destroy_fs_client(fsc); + ceph_destroy_client(client); out_final: dout("ceph_get_sb fail %d\n", err); return err; @@ -849,12 +1042,11 @@ static int ceph_get_sb(struct file_system_type *fs_type, static void ceph_kill_sb(struct super_block *s) { - struct ceph_fs_client *fsc = ceph_sb_to_client(s); + struct ceph_client *client = ceph_sb_to_client(s); dout("kill_sb %p\n", s); - ceph_mdsc_pre_umount(fsc->mdsc); + ceph_mdsc_pre_umount(&client->mdsc); kill_anon_super(s); /* will call put_super after sb is r/o */ - ceph_mdsc_destroy(fsc); - destroy_fs_client(fsc); + ceph_destroy_client(client); } static struct file_system_type ceph_fs_type = { @@ -870,20 +1062,36 @@ static struct file_system_type ceph_fs_type = { static int __init init_ceph(void) { - int ret = init_caches(); - if (ret) + int ret = 0; + + ret = ceph_debugfs_init(); + if (ret < 0) goto out; + ret = ceph_msgr_init(); + if (ret < 0) + goto out_debugfs; + + ret = init_caches(); + if (ret) + goto out_msgr; + ret = register_filesystem(&ceph_fs_type); if (ret) goto out_icache; - pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); - + pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n", + CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL, + CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, + CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); return 0; out_icache: destroy_caches(); +out_msgr: + ceph_msgr_exit(); +out_debugfs: + ceph_debugfs_cleanup(); out: return ret; } @@ -893,6 +1101,8 @@ static void __exit exit_ceph(void) dout("exit_ceph\n"); unregister_filesystem(&ceph_fs_type); destroy_caches(); + ceph_msgr_exit(); + ceph_debugfs_cleanup(); } module_init(init_ceph); diff --git a/trunk/fs/ceph/super.h b/trunk/fs/ceph/super.h index 1886294e12f7..b87638e84c4b 100644 --- a/trunk/fs/ceph/super.h +++ b/trunk/fs/ceph/super.h @@ -1,7 +1,7 @@ #ifndef _FS_CEPH_SUPER_H #define _FS_CEPH_SUPER_H -#include +#include "ceph_debug.h" #include #include @@ -14,7 +14,13 @@ #include #include -#include +#include "types.h" +#include "messenger.h" +#include "msgpool.h" +#include "mon_client.h" +#include "mds_client.h" +#include "osd_client.h" +#include "ceph_fs.h" /* f_type in struct statfs */ #define CEPH_SUPER_MAGIC 0x00c36400 @@ -24,25 +30,42 @@ #define CEPH_BLOCK_SHIFT 20 /* 1 MB */ #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) -#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ -#define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ -#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ +/* + * Supported features + */ +#define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK +#define CEPH_FEATURE_REQUIRED CEPH_FEATURE_NOSRCADDR -#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) +/* + * mount options + */ +#define CEPH_OPT_FSID (1<<0) +#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ +#define CEPH_OPT_MYIP (1<<2) /* specified my ip */ +#define CEPH_OPT_DIRSTAT (1<<4) /* funky `cat dirname` for stats */ +#define CEPH_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ +#define CEPH_OPT_NOCRC (1<<6) /* no data crc on writes */ +#define CEPH_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ -#define ceph_set_mount_opt(fsc, opt) \ - (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; -#define ceph_test_mount_opt(fsc, opt) \ - (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) +#define CEPH_OPT_DEFAULT (CEPH_OPT_RBYTES) -#define CEPH_MAX_READDIR_DEFAULT 1024 -#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) -#define CEPH_SNAPDIRNAME_DEFAULT ".snap" +#define ceph_set_opt(client, opt) \ + (client)->mount_args->flags |= CEPH_OPT_##opt; +#define ceph_test_opt(client, opt) \ + (!!((client)->mount_args->flags & CEPH_OPT_##opt)) -struct ceph_mount_options { - int flags; - int sb_flags; +struct ceph_mount_args { + int sb_flags; + int flags; + struct ceph_fsid fsid; + struct ceph_entity_addr my_addr; + int num_mon; + struct ceph_entity_addr *mon_addr; + int mount_timeout; + int osd_idle_ttl; + int osd_timeout; + int osd_keepalive_timeout; int wsize; int rsize; /* max readahead */ int congestion_kb; /* max writeback in flight */ @@ -50,25 +73,82 @@ struct ceph_mount_options { int cap_release_safety; int max_readdir; /* max readdir result (entires) */ int max_readdir_bytes; /* max readdir result (bytes) */ - - /* - * everything above this point can be memcmp'd; everything below - * is handled in compare_mount_options() - */ - char *snapdir_name; /* default ".snap" */ + char *name; + char *secret; }; -struct ceph_fs_client { - struct super_block *sb; +/* + * defaults + */ +#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 +#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ +#define CEPH_OSD_KEEPALIVE_DEFAULT 5 +#define CEPH_OSD_IDLE_TTL_DEFAULT 60 +#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ +#define CEPH_MAX_READDIR_DEFAULT 1024 +#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) + +#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) +#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) + +#define CEPH_SNAPDIRNAME_DEFAULT ".snap" +#define CEPH_AUTH_NAME_DEFAULT "guest" +/* + * Delay telling the MDS we no longer want caps, in case we reopen + * the file. Delay a minimum amount of time, even if we send a cap + * message for some other reason. Otherwise, take the oppotunity to + * update the mds to avoid sending another message later. + */ +#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ +#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ + +#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) + +/* mount state */ +enum { + CEPH_MOUNT_MOUNTING, + CEPH_MOUNT_MOUNTED, + CEPH_MOUNT_UNMOUNTING, + CEPH_MOUNT_UNMOUNTED, + CEPH_MOUNT_SHUTDOWN, +}; - struct ceph_mount_options *mount_options; - struct ceph_client *client; +/* + * subtract jiffies + */ +static inline unsigned long time_sub(unsigned long a, unsigned long b) +{ + BUG_ON(time_after(b, a)); + return (long)a - (long)b; +} + +/* + * per-filesystem client state + * + * possibly shared by multiple mount points, if they are + * mounting the same ceph filesystem/cluster. + */ +struct ceph_client { + struct ceph_fsid fsid; + bool have_fsid; + + struct mutex mount_mutex; /* serialize mount attempts */ + struct ceph_mount_args *mount_args; + + struct super_block *sb; unsigned long mount_state; + wait_queue_head_t auth_wq; + + int auth_err; + int min_caps; /* min caps i added */ - struct ceph_mds_client *mdsc; + struct ceph_messenger *msgr; /* messenger instance */ + struct ceph_mon_client monc; + struct ceph_mds_client mdsc; + struct ceph_osd_client osdc; /* writeback */ mempool_t *wb_pagevec_pool; @@ -80,14 +160,14 @@ struct ceph_fs_client { struct backing_dev_info backing_dev_info; #ifdef CONFIG_DEBUG_FS - struct dentry *debugfs_dentry_lru, *debugfs_caps; + struct dentry *debugfs_monmap; + struct dentry *debugfs_mdsmap, *debugfs_osdmap; + struct dentry *debugfs_dir, *debugfs_dentry_lru, *debugfs_caps; struct dentry *debugfs_congestion_kb; struct dentry *debugfs_bdi; - struct dentry *debugfs_mdsc, *debugfs_mdsmap; #endif }; - /* * File i/o capability. This tracks shared state with the metadata * server that allows us to cache or writeback attributes or to read @@ -195,20 +275,6 @@ struct ceph_inode_xattr { int should_free_val; }; -/* - * Ceph dentry state - */ -struct ceph_dentry_info { - struct ceph_mds_session *lease_session; - u32 lease_gen, lease_shared_gen; - u32 lease_seq; - unsigned long lease_renew_after, lease_renew_from; - struct list_head lru; - struct dentry *dentry; - u64 time; - u64 offset; -}; - struct ceph_inode_xattrs_info { /* * (still encoded) xattr blob. we avoid the overhead of parsing @@ -230,6 +296,11 @@ struct ceph_inode_xattrs_info { /* * Ceph inode. */ +#define CEPH_I_COMPLETE 1 /* we have complete directory cached */ +#define CEPH_I_NODELAY 4 /* do not delay cap release */ +#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ +#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ + struct ceph_inode_info { struct ceph_vino i_vino; /* ceph ino + snap */ @@ -320,63 +391,6 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode) return container_of(inode, struct ceph_inode_info, vfs_inode); } -static inline struct ceph_vino ceph_vino(struct inode *inode) -{ - return ceph_inode(inode)->i_vino; -} - -/* - * ino_t is <64 bits on many architectures, blech. - * - * don't include snap in ino hash, at least for now. - */ -static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) -{ - ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ -#if BITS_PER_LONG == 32 - ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; - if (!ino) - ino = 1; -#endif - return ino; -} - -/* for printf-style formatting */ -#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap - -static inline u64 ceph_ino(struct inode *inode) -{ - return ceph_inode(inode)->i_vino.ino; -} -static inline u64 ceph_snap(struct inode *inode) -{ - return ceph_inode(inode)->i_vino.snap; -} - -static inline int ceph_ino_compare(struct inode *inode, void *data) -{ - struct ceph_vino *pvino = (struct ceph_vino *)data; - struct ceph_inode_info *ci = ceph_inode(inode); - return ci->i_vino.ino == pvino->ino && - ci->i_vino.snap == pvino->snap; -} - -static inline struct inode *ceph_find_inode(struct super_block *sb, - struct ceph_vino vino) -{ - ino_t t = ceph_vino_to_ino(vino); - return ilookup5(sb, t, ceph_ino_compare, &vino); -} - - -/* - * Ceph inode. - */ -#define CEPH_I_COMPLETE 1 /* we have complete directory cached */ -#define CEPH_I_NODELAY 4 /* do not delay cap release */ -#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ -#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ - static inline void ceph_i_clear(struct inode *inode, unsigned mask) { struct ceph_inode_info *ci = ceph_inode(inode); @@ -400,9 +414,8 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask) struct ceph_inode_info *ci = ceph_inode(inode); bool r; - spin_lock(&inode->i_lock); + smp_mb(); r = (ci->i_ceph_flags & mask) == mask; - spin_unlock(&inode->i_lock); return r; } @@ -419,6 +432,20 @@ extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, struct ceph_inode_frag *pfrag, int *found); +/* + * Ceph dentry state + */ +struct ceph_dentry_info { + struct ceph_mds_session *lease_session; + u32 lease_gen, lease_shared_gen; + u32 lease_seq; + unsigned long lease_renew_after, lease_renew_from; + struct list_head lru; + struct dentry *dentry; + u64 time; + u64 offset; +}; + static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) { return (struct ceph_dentry_info *)dentry->d_fsdata; @@ -429,6 +456,22 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off) return ((loff_t)frag << 32) | (loff_t)off; } +/* + * ino_t is <64 bits on many architectures, blech. + * + * don't include snap in ino hash, at least for now. + */ +static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) +{ + ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ +#if BITS_PER_LONG == 32 + ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; + if (!ino) + ino = 1; +#endif + return ino; +} + static inline int ceph_set_ino_cb(struct inode *inode, void *data) { ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; @@ -436,6 +479,39 @@ static inline int ceph_set_ino_cb(struct inode *inode, void *data) return 0; } +static inline struct ceph_vino ceph_vino(struct inode *inode) +{ + return ceph_inode(inode)->i_vino; +} + +/* for printf-style formatting */ +#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap + +static inline u64 ceph_ino(struct inode *inode) +{ + return ceph_inode(inode)->i_vino.ino; +} +static inline u64 ceph_snap(struct inode *inode) +{ + return ceph_inode(inode)->i_vino.snap; +} + +static inline int ceph_ino_compare(struct inode *inode, void *data) +{ + struct ceph_vino *pvino = (struct ceph_vino *)data; + struct ceph_inode_info *ci = ceph_inode(inode); + return ci->i_vino.ino == pvino->ino && + ci->i_vino.snap == pvino->snap; +} + +static inline struct inode *ceph_find_inode(struct super_block *sb, + struct ceph_vino vino) +{ + ino_t t = ceph_vino_to_ino(vino); + return ilookup5(sb, t, ceph_ino_compare, &vino); +} + + /* * caps helpers */ @@ -500,18 +576,18 @@ extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx, int need); extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx); -extern void ceph_reservation_status(struct ceph_fs_client *client, +extern void ceph_reservation_status(struct ceph_client *client, int *total, int *avail, int *used, int *reserved, int *min); -static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode) +static inline struct ceph_client *ceph_inode_to_client(struct inode *inode) { - return (struct ceph_fs_client *)inode->i_sb->s_fs_info; + return (struct ceph_client *)inode->i_sb->s_fs_info; } -static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb) +static inline struct ceph_client *ceph_sb_to_client(struct super_block *sb) { - return (struct ceph_fs_client *)sb->s_fs_info; + return (struct ceph_client *)sb->s_fs_info; } @@ -540,6 +616,51 @@ struct ceph_file_info { +/* + * snapshots + */ + +/* + * A "snap context" is the set of existing snapshots when we + * write data. It is used by the OSD to guide its COW behavior. + * + * The ceph_snap_context is refcounted, and attached to each dirty + * page, indicating which context the dirty data belonged when it was + * dirtied. + */ +struct ceph_snap_context { + atomic_t nref; + u64 seq; + int num_snaps; + u64 snaps[]; +}; + +static inline struct ceph_snap_context * +ceph_get_snap_context(struct ceph_snap_context *sc) +{ + /* + printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), + atomic_read(&sc->nref)+1); + */ + if (sc) + atomic_inc(&sc->nref); + return sc; +} + +static inline void ceph_put_snap_context(struct ceph_snap_context *sc) +{ + if (!sc) + return; + /* + printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), + atomic_read(&sc->nref)-1); + */ + if (atomic_dec_and_test(&sc->nref)) { + /*printk(" deleting snap_context %p\n", sc);*/ + kfree(sc); + } +} + /* * A "snap realm" describes a subset of the file hierarchy sharing * the same set of snapshots that apply to it. The realms themselves @@ -578,33 +699,16 @@ struct ceph_snap_realm { spinlock_t inodes_with_caps_lock; }; -static inline int default_congestion_kb(void) -{ - int congestion_kb; - /* - * Copied from NFS - * - * congestion size, scale with available memory. - * - * 64MB: 8192k - * 128MB: 11585k - * 256MB: 16384k - * 512MB: 23170k - * 1GB: 32768k - * 2GB: 46340k - * 4GB: 65536k - * 8GB: 92681k - * 16GB: 131072k - * - * This allows larger machines to have larger/more transfers. - * Limit the default to 256M - */ - congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); - if (congestion_kb > 256*1024) - congestion_kb = 256*1024; - - return congestion_kb; + +/* + * calculate the number of pages a given length and offset map onto, + * if we align the data. + */ +static inline int calc_pages_for(u64 off, u64 len) +{ + return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - + (off >> PAGE_CACHE_SHIFT); } @@ -637,6 +741,16 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci) ci_item)->writing; } + +/* super.c */ +extern struct kmem_cache *ceph_inode_cachep; +extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_dentry_cachep; +extern struct kmem_cache *ceph_file_cachep; + +extern const char *ceph_msg_type_name(int type); +extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); + /* inode.c */ extern const struct inode_operations ceph_file_iops; @@ -743,18 +857,12 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); /* file.c */ extern const struct file_operations ceph_file_fops; extern const struct address_space_operations ceph_aops; -extern int ceph_copy_to_page_vector(struct page **pages, - const char *data, - loff_t off, size_t len); -extern int ceph_copy_from_page_vector(struct page **pages, - char *data, - loff_t off, size_t len); -extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd, int mode, int locked_dir); extern int ceph_release(struct inode *inode, struct file *filp); +extern void ceph_release_page_vector(struct page **pages, int num_pages); /* dir.c */ extern const struct file_operations ceph_dir_fops; @@ -784,6 +892,12 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* export.c */ extern const struct export_operations ceph_export_ops; +/* debugfs.c */ +extern int ceph_debugfs_init(void); +extern void ceph_debugfs_cleanup(void); +extern int ceph_debugfs_client_init(struct ceph_client *client); +extern void ceph_debugfs_client_cleanup(struct ceph_client *client); + /* locks.c */ extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); @@ -800,8 +914,4 @@ static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) return NULL; } -/* debugfs.c */ -extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); -extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); - #endif /* _FS_CEPH_SUPER_H */ diff --git a/trunk/include/linux/ceph/types.h b/trunk/fs/ceph/types.h similarity index 100% rename from trunk/include/linux/ceph/types.h rename to trunk/fs/ceph/types.h diff --git a/trunk/fs/ceph/xattr.c b/trunk/fs/ceph/xattr.c index 6e12a6ba5f79..9578af610b73 100644 --- a/trunk/fs/ceph/xattr.c +++ b/trunk/fs/ceph/xattr.c @@ -1,9 +1,6 @@ -#include - +#include "ceph_debug.h" #include "super.h" -#include "mds_client.h" - -#include +#include "decode.h" #include #include @@ -623,12 +620,12 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) static int ceph_sync_setxattr(struct dentry *dentry, const char *name, const char *value, size_t size, int flags) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); + struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); struct inode *inode = dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); struct inode *parent_inode = dentry->d_parent->d_inode; struct ceph_mds_request *req; - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_client *mdsc = &client->mdsc; int err; int i, nr_pages; struct page **pages = NULL; @@ -716,9 +713,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name, /* preallocate memory for xattr name, value, index node */ err = -ENOMEM; - newname = kmemdup(name, name_len + 1, GFP_NOFS); + newname = kmalloc(name_len + 1, GFP_NOFS); if (!newname) goto out; + memcpy(newname, name, name_len + 1); if (val_len) { newval = kmalloc(val_len + 1, GFP_NOFS); @@ -779,8 +777,8 @@ int ceph_setxattr(struct dentry *dentry, const char *name, static int ceph_send_removexattr(struct dentry *dentry, const char *name) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); - struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); + struct ceph_mds_client *mdsc = &client->mdsc; struct inode *inode = dentry->d_inode; struct inode *parent_inode = dentry->d_parent->d_inode; struct ceph_mds_request *req; diff --git a/trunk/fs/gfs2/Kconfig b/trunk/fs/gfs2/Kconfig index c465ae066c62..cc9665522148 100644 --- a/trunk/fs/gfs2/Kconfig +++ b/trunk/fs/gfs2/Kconfig @@ -1,6 +1,6 @@ config GFS2_FS tristate "GFS2 file system support" - depends on (64BIT || LBDAF) + depends on EXPERIMENTAL && (64BIT || LBDAF) select DLM if GFS2_FS_LOCKING_DLM select CONFIGFS_FS if GFS2_FS_LOCKING_DLM select SYSFS if GFS2_FS_LOCKING_DLM diff --git a/trunk/fs/gfs2/aops.c b/trunk/fs/gfs2/aops.c index 6b24afb96aae..194fe16d8418 100644 --- a/trunk/fs/gfs2/aops.c +++ b/trunk/fs/gfs2/aops.c @@ -36,8 +36,8 @@ #include "glops.h" -void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, - unsigned int from, unsigned int to) +static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, + unsigned int from, unsigned int to) { struct buffer_head *head = page_buffers(page); unsigned int bsize = head->b_size; @@ -615,7 +615,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, unsigned int data_blocks = 0, ind_blocks = 0, rblocks; int alloc_required; int error = 0; - struct gfs2_alloc *al = NULL; + struct gfs2_alloc *al; pgoff_t index = pos >> PAGE_CACHE_SHIFT; unsigned from = pos & (PAGE_CACHE_SIZE - 1); unsigned to = from + len; @@ -663,8 +663,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, rblocks += RES_STATFS + RES_QUOTA; if (&ip->i_inode == sdp->sd_rindex) rblocks += 2 * RES_STATFS; - if (alloc_required) - rblocks += gfs2_rg_blocks(al); error = gfs2_trans_begin(sdp, rblocks, PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); @@ -698,11 +696,13 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, page_cache_release(page); - gfs2_trans_end(sdp); + /* + * XXX(truncate): the call below should probably be replaced with + * a call to the gfs2-specific truncate blocks helper to actually + * release disk blocks.. + */ if (pos + len > ip->i_inode.i_size) - gfs2_trim_blocks(&ip->i_inode); - goto out_trans_fail; - + truncate_setsize(&ip->i_inode, ip->i_inode.i_size); out_endtrans: gfs2_trans_end(sdp); out_trans_fail: @@ -802,8 +802,10 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, page_cache_release(page); if (copied) { - if (inode->i_size < to) + if (inode->i_size < to) { i_size_write(inode, to); + ip->i_disksize = inode->i_size; + } gfs2_dinode_out(ip, di); mark_inode_dirty(inode); } @@ -874,6 +876,8 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); if (ret > 0) { + if (inode->i_size > ip->i_disksize) + ip->i_disksize = inode->i_size; gfs2_dinode_out(ip, dibh->b_data); mark_inode_dirty(inode); } diff --git a/trunk/fs/gfs2/bmap.c b/trunk/fs/gfs2/bmap.c index 5476c066d4ee..6f482809d1a3 100644 --- a/trunk/fs/gfs2/bmap.c +++ b/trunk/fs/gfs2/bmap.c @@ -50,7 +50,7 @@ struct strip_mine { * @ip: the inode * @dibh: the dinode buffer * @block: the block number that was allocated - * @page: The (optional) page. This is looked up if @page is NULL + * @private: any locked page held by the caller process * * Returns: errno */ @@ -109,7 +109,8 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, /** * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big * @ip: The GFS2 inode to unstuff - * @page: The (optional) page. This is looked up if the @page is NULL + * @unstuffer: the routine that handles unstuffing a non-zero length file + * @private: private data for the unstuffer * * This routine unstuffs a dinode and returns it to a "normal" state such * that the height can be grown in the traditional way. @@ -131,7 +132,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) if (error) goto out; - if (i_size_read(&ip->i_inode)) { + if (ip->i_disksize) { /* Get a free block, fill it with the stuffed data, and write it out to disk */ @@ -160,7 +161,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) di = (struct gfs2_dinode *)dibh->b_data; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); - if (i_size_read(&ip->i_inode)) { + if (ip->i_disksize) { *(__be64 *)(di + 1) = cpu_to_be64(block); gfs2_add_inode_blocks(&ip->i_inode, 1); di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); @@ -883,15 +884,84 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, return error; } +/** + * do_grow - Make a file look bigger than it is + * @ip: the inode + * @size: the size to set the file to + * + * Called with an exclusive lock on @ip. + * + * Returns: errno + */ + +static int do_grow(struct gfs2_inode *ip, u64 size) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_alloc *al; + struct buffer_head *dibh; + int error; + + al = gfs2_alloc_get(ip); + if (!al) + return -ENOMEM; + + error = gfs2_quota_lock_check(ip); + if (error) + goto out; + + al->al_requested = sdp->sd_max_height + RES_DATA; + + error = gfs2_inplace_reserve(ip); + if (error) + goto out_gunlock_q; + + error = gfs2_trans_begin(sdp, + sdp->sd_max_height + al->al_rgd->rd_length + + RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0); + if (error) + goto out_ipres; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out_end_trans; + + if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { + if (gfs2_is_stuffed(ip)) { + error = gfs2_unstuff_dinode(ip, NULL); + if (error) + goto out_brelse; + } + } + + ip->i_disksize = size; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + +out_brelse: + brelse(dibh); +out_end_trans: + gfs2_trans_end(sdp); +out_ipres: + gfs2_inplace_release(ip); +out_gunlock_q: + gfs2_quota_unlock(ip); +out: + gfs2_alloc_put(ip); + return error; +} + + /** * gfs2_block_truncate_page - Deal with zeroing out data for truncate * * This is partly borrowed from ext3. */ -static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) +static int gfs2_block_truncate_page(struct address_space *mapping) { struct inode *inode = mapping->host; struct gfs2_inode *ip = GFS2_I(inode); + loff_t from = inode->i_size; unsigned long index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned blocksize, iblock, length, pos; @@ -953,11 +1023,9 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) return err; } -static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) +static int trunc_start(struct gfs2_inode *ip, u64 size) { - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct address_space *mapping = inode->i_mapping; + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; int journaled = gfs2_is_jdata(ip); int error; @@ -971,26 +1039,31 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) if (error) goto out; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - if (gfs2_is_stuffed(ip)) { - gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); + u64 dsize = size + sizeof(struct gfs2_dinode); + ip->i_disksize = size; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + if (dsize > dibh->b_size) + dsize = dibh->b_size; + gfs2_buffer_clear_tail(dibh, dsize); + error = 1; } else { - if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) { - error = gfs2_block_truncate_page(mapping, newsize); - if (error) - goto out_brelse; + if (size & (u64)(sdp->sd_sb.sb_bsize - 1)) + error = gfs2_block_truncate_page(ip->i_inode.i_mapping); + + if (!error) { + ip->i_disksize = size; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; + ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); } - ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; } - i_size_write(inode, newsize); - ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; - gfs2_dinode_out(ip, dibh->b_data); - - truncate_pagecache(inode, oldsize, newsize); -out_brelse: brelse(dibh); + out: gfs2_trans_end(sdp); return error; @@ -1050,7 +1123,7 @@ static int trunc_end(struct gfs2_inode *ip) if (error) goto out; - if (!i_size_read(&ip->i_inode)) { + if (!ip->i_disksize) { ip->i_height = 0; ip->i_goal = ip->i_no_addr; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); @@ -1070,154 +1143,92 @@ static int trunc_end(struct gfs2_inode *ip) /** * do_shrink - make a file smaller - * @inode: the inode - * @oldsize: the current inode size - * @newsize: the size to make the file + * @ip: the inode + * @size: the size to make the file + * @truncator: function to truncate the last partial block * - * Called with an exclusive lock on @inode. The @size must - * be equal to or smaller than the current inode size. + * Called with an exclusive lock on @ip. * * Returns: errno */ -static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize) +static int do_shrink(struct gfs2_inode *ip, u64 size) { - struct gfs2_inode *ip = GFS2_I(inode); int error; - error = trunc_start(inode, oldsize, newsize); + error = trunc_start(ip, size); if (error < 0) return error; - if (gfs2_is_stuffed(ip)) + if (error > 0) return 0; - error = trunc_dealloc(ip, newsize); - if (error == 0) + error = trunc_dealloc(ip, size); + if (!error) error = trunc_end(ip); return error; } -void gfs2_trim_blocks(struct inode *inode) -{ - u64 size = inode->i_size; - int ret; - - ret = do_shrink(inode, size, size); - WARN_ON(ret != 0); -} - -/** - * do_grow - Touch and update inode size - * @inode: The inode - * @size: The new size - * - * This function updates the timestamps on the inode and - * may also increase the size of the inode. This function - * must not be called with @size any smaller than the current - * inode size. - * - * Although it is not strictly required to unstuff files here, - * earlier versions of GFS2 have a bug in the stuffed file reading - * code which will result in a buffer overrun if the size is larger - * than the max stuffed file size. In order to prevent this from - * occuring, such files are unstuffed, but in other cases we can - * just update the inode size directly. - * - * Returns: 0 on success, or -ve on error - */ - -static int do_grow(struct inode *inode, u64 size) +static int do_touch(struct gfs2_inode *ip, u64 size) { - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; - struct gfs2_alloc *al = NULL; int error; - if (gfs2_is_stuffed(ip) && - (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { - al = gfs2_alloc_get(ip); - if (al == NULL) - return -ENOMEM; - - error = gfs2_quota_lock_check(ip); - if (error) - goto do_grow_alloc_put; - - al->al_requested = 1; - error = gfs2_inplace_reserve(ip); - if (error) - goto do_grow_qunlock; - } - - error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); + error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) - goto do_grow_release; + return error; - if (al) { - error = gfs2_unstuff_dinode(ip, NULL); - if (error) - goto do_end_trans; - } + down_write(&ip->i_rw_mutex); error = gfs2_meta_inode_buffer(ip, &dibh); if (error) - goto do_end_trans; + goto do_touch_out; - i_size_write(inode, size); ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); -do_end_trans: +do_touch_out: + up_write(&ip->i_rw_mutex); gfs2_trans_end(sdp); -do_grow_release: - if (al) { - gfs2_inplace_release(ip); -do_grow_qunlock: - gfs2_quota_unlock(ip); -do_grow_alloc_put: - gfs2_alloc_put(ip); - } return error; } /** - * gfs2_setattr_size - make a file a given size - * @inode: the inode - * @newsize: the size to make the file + * gfs2_truncatei - make a file a given size + * @ip: the inode + * @size: the size to make the file + * @truncator: function to truncate the last partial block * - * The file size can grow, shrink, or stay the same size. This - * is called holding i_mutex and an exclusive glock on the inode - * in question. + * The file size can grow, shrink, or stay the same size. * * Returns: errno */ -int gfs2_setattr_size(struct inode *inode, u64 newsize) +int gfs2_truncatei(struct gfs2_inode *ip, u64 size) { - int ret; - u64 oldsize; - - BUG_ON(!S_ISREG(inode->i_mode)); + int error; - ret = inode_newsize_ok(inode, newsize); - if (ret) - return ret; + if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode))) + return -EINVAL; - oldsize = inode->i_size; - if (newsize >= oldsize) - return do_grow(inode, newsize); + if (size > ip->i_disksize) + error = do_grow(ip, size); + else if (size < ip->i_disksize) + error = do_shrink(ip, size); + else + /* update time stamps */ + error = do_touch(ip, size); - return do_shrink(inode, oldsize, newsize); + return error; } int gfs2_truncatei_resume(struct gfs2_inode *ip) { int error; - error = trunc_dealloc(ip, i_size_read(&ip->i_inode)); + error = trunc_dealloc(ip, ip->i_disksize); if (!error) error = trunc_end(ip); return error; @@ -1258,7 +1269,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, shift = sdp->sd_sb.sb_bsize_shift; BUG_ON(gfs2_is_dir(ip)); - end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; + end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift; lblock = offset >> shift; lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; if (lblock_stop > end_of_file) diff --git a/trunk/fs/gfs2/bmap.h b/trunk/fs/gfs2/bmap.h index 42fea03e2bd9..a20a5213135a 100644 --- a/trunk/fs/gfs2/bmap.h +++ b/trunk/fs/gfs2/bmap.h @@ -44,16 +44,14 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip, } } -extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); -extern int gfs2_block_map(struct inode *inode, sector_t lblock, - struct buffer_head *bh, int create); -extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, - u64 *dblock, unsigned *extlen); -extern int gfs2_setattr_size(struct inode *inode, u64 size); -extern void gfs2_trim_blocks(struct inode *inode); -extern int gfs2_truncatei_resume(struct gfs2_inode *ip); -extern int gfs2_file_dealloc(struct gfs2_inode *ip); -extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, - unsigned int len); +int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); +int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create); +int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen); + +int gfs2_truncatei(struct gfs2_inode *ip, u64 size); +int gfs2_truncatei_resume(struct gfs2_inode *ip); +int gfs2_file_dealloc(struct gfs2_inode *ip); +int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, + unsigned int len); #endif /* __BMAP_DOT_H__ */ diff --git a/trunk/fs/gfs2/dentry.c b/trunk/fs/gfs2/dentry.c index 6798755b3858..bb7907bde3d8 100644 --- a/trunk/fs/gfs2/dentry.c +++ b/trunk/fs/gfs2/dentry.c @@ -49,7 +49,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) ip = GFS2_I(inode); } - if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) + if (sdp->sd_args.ar_localcaching) goto valid; had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); diff --git a/trunk/fs/gfs2/dir.c b/trunk/fs/gfs2/dir.c index 5c356d09c321..b9dd88a78dd4 100644 --- a/trunk/fs/gfs2/dir.c +++ b/trunk/fs/gfs2/dir.c @@ -79,9 +79,6 @@ #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) -struct qstr gfs2_qdot __read_mostly; -struct qstr gfs2_qdotdot __read_mostly; - typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, u64 leaf_no, void *data); typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, @@ -130,8 +127,8 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, gfs2_trans_add_bh(ip->i_gl, dibh, 1); memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); - if (ip->i_inode.i_size < offset + size) - i_size_write(&ip->i_inode, offset + size); + if (ip->i_disksize < offset + size) + ip->i_disksize = offset + size; ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_dinode_out(ip, dibh->b_data); @@ -228,8 +225,8 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf, if (error) return error; - if (ip->i_inode.i_size < offset + copied) - i_size_write(&ip->i_inode, offset + copied); + if (ip->i_disksize < offset + copied) + ip->i_disksize = offset + copied; ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); @@ -278,13 +275,12 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, unsigned int o; int copied = 0; int error = 0; - u64 disksize = i_size_read(&ip->i_inode); - if (offset >= disksize) + if (offset >= ip->i_disksize) return 0; - if (offset + size > disksize) - size = disksize - offset; + if (offset + size > ip->i_disksize) + size = ip->i_disksize - offset; if (!size) return 0; @@ -731,7 +727,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, unsigned hsize = 1 << ip->i_depth; unsigned index; u64 ln; - if (hsize * sizeof(u64) != i_size_read(inode)) { + if (hsize * sizeof(u64) != ip->i_disksize) { gfs2_consist_inode(ip); return ERR_PTR(-EIO); } @@ -883,7 +879,7 @@ static int dir_make_exhash(struct inode *inode) for (x = sdp->sd_hash_ptrs; x--; lp++) *lp = cpu_to_be64(bn); - i_size_write(inode, sdp->sd_sb.sb_bsize / 2); + dip->i_disksize = sdp->sd_sb.sb_bsize / 2; gfs2_add_inode_blocks(&dip->i_inode, 1); dip->i_diskflags |= GFS2_DIF_EXHASH; @@ -1061,12 +1057,11 @@ static int dir_double_exhash(struct gfs2_inode *dip) u64 *buf; u64 *from, *to; u64 block; - u64 disksize = i_size_read(&dip->i_inode); int x; int error = 0; hsize = 1 << dip->i_depth; - if (hsize * sizeof(u64) != disksize) { + if (hsize * sizeof(u64) != dip->i_disksize) { gfs2_consist_inode(dip); return -EIO; } @@ -1077,7 +1072,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) if (!buf) return -ENOMEM; - for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) { + for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) { error = gfs2_dir_read_data(dip, (char *)buf, block * sdp->sd_hash_bsize, sdp->sd_hash_bsize, 1); @@ -1375,7 +1370,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, unsigned depth = 0; hsize = 1 << dip->i_depth; - if (hsize * sizeof(u64) != i_size_read(inode)) { + if (hsize * sizeof(u64) != dip->i_disksize) { gfs2_consist_inode(dip); return -EIO; } @@ -1789,7 +1784,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) int error = 0; hsize = 1 << dip->i_depth; - if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { + if (hsize * sizeof(u64) != dip->i_disksize) { gfs2_consist_inode(dip); return -EIO; } diff --git a/trunk/fs/gfs2/dir.h b/trunk/fs/gfs2/dir.h index a98f644bd3df..4f919440c3be 100644 --- a/trunk/fs/gfs2/dir.h +++ b/trunk/fs/gfs2/dir.h @@ -17,24 +17,23 @@ struct inode; struct gfs2_inode; struct gfs2_inum; -extern struct inode *gfs2_dir_search(struct inode *dir, - const struct qstr *filename); -extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, - const struct gfs2_inode *ip); -extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, - const struct gfs2_inode *ip, unsigned int type); -extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); -extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, - filldir_t filldir); -extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, - const struct gfs2_inode *nip, unsigned int new_type); +struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename); +int gfs2_dir_check(struct inode *dir, const struct qstr *filename, + const struct gfs2_inode *ip); +int gfs2_dir_add(struct inode *inode, const struct qstr *filename, + const struct gfs2_inode *ip, unsigned int type); +int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); +int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, + filldir_t filldir); +int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, + const struct gfs2_inode *nip, unsigned int new_type); -extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); +int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); -extern int gfs2_diradd_alloc_required(struct inode *dir, - const struct qstr *filename); -extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, - struct buffer_head **bhp); +int gfs2_diradd_alloc_required(struct inode *dir, + const struct qstr *filename); +int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, + struct buffer_head **bhp); static inline u32 gfs2_disk_hash(const char *data, int len) { @@ -62,7 +61,4 @@ static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct memcpy(dent + 1, name->name, name->len); } -extern struct qstr gfs2_qdot; -extern struct qstr gfs2_qdotdot; - #endif /* __DIR_DOT_H__ */ diff --git a/trunk/fs/gfs2/export.c b/trunk/fs/gfs2/export.c index 06d582732d34..dfe237a3f8ad 100644 --- a/trunk/fs/gfs2/export.c +++ b/trunk/fs/gfs2/export.c @@ -126,9 +126,16 @@ static int gfs2_get_name(struct dentry *parent, char *name, static struct dentry *gfs2_get_parent(struct dentry *child) { + struct qstr dotdot; struct dentry *dentry; - dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); + /* + * XXX(hch): it would be a good idea to keep this around as a + * static variable. + */ + gfs2_str2qstr(&dotdot, ".."); + + dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1)); if (!IS_ERR(dentry)) dentry->d_op = &gfs2_dops; return dentry; diff --git a/trunk/fs/gfs2/file.c b/trunk/fs/gfs2/file.c index 237ee6a940df..4edd662c8232 100644 --- a/trunk/fs/gfs2/file.c +++ b/trunk/fs/gfs2/file.c @@ -382,10 +382,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) rblocks = RES_DINODE + ind_blocks; if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; - if (ind_blocks || data_blocks) { + if (ind_blocks || data_blocks) rblocks += RES_STATFS + RES_QUOTA; - rblocks += gfs2_rg_blocks(al); - } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) goto out_trans_fail; @@ -493,7 +491,7 @@ static int gfs2_open(struct inode *inode, struct file *file) goto fail; if (!(file->f_flags & O_LARGEFILE) && - i_size_read(inode) > MAX_NON_LFS) { + ip->i_disksize > MAX_NON_LFS) { error = -EOVERFLOW; goto fail_gunlock; } diff --git a/trunk/fs/gfs2/glock.c b/trunk/fs/gfs2/glock.c index 87778857f099..9adf8f924e08 100644 --- a/trunk/fs/gfs2/glock.c +++ b/trunk/fs/gfs2/glock.c @@ -441,8 +441,6 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) else gfs2_glock_put_nolock(gl); } - if (held1 && held2 && list_empty(&gl->gl_holders)) - clear_bit(GLF_QUEUED, &gl->gl_flags); gl->gl_state = new_state; gl->gl_tchange = jiffies; @@ -1014,7 +1012,6 @@ __acquires(&gl->gl_spin) if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) insert_pt = &gh2->gh_list; } - set_bit(GLF_QUEUED, &gl->gl_flags); if (likely(insert_pt == NULL)) { list_add_tail(&gh->gh_list, &gl->gl_holders); if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) @@ -1313,12 +1310,10 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) gfs2_glock_hold(gl); holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; - if (test_bit(GLF_QUEUED, &gl->gl_flags)) { - if (time_before(now, holdtime)) - delay = holdtime - now; - if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) - delay = gl->gl_ops->go_min_hold_time; - } + if (time_before(now, holdtime)) + delay = holdtime - now; + if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) + delay = gl->gl_ops->go_min_hold_time; spin_lock(&gl->gl_spin); handle_callback(gl, state, delay); @@ -1517,7 +1512,7 @@ static void clear_glock(struct gfs2_glock *gl) spin_unlock(&lru_lock); spin_lock(&gl->gl_spin); - if (gl->gl_state != LM_ST_UNLOCKED) + if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED) handle_callback(gl, LM_ST_UNLOCKED, 0); spin_unlock(&gl->gl_spin); gfs2_glock_hold(gl); @@ -1665,8 +1660,6 @@ static const char *gflags2str(char *buf, const unsigned long *gflags) *p++ = 'I'; if (test_bit(GLF_FROZEN, gflags)) *p++ = 'F'; - if (test_bit(GLF_QUEUED, gflags)) - *p++ = 'q'; *p = 0; return buf; } @@ -1783,12 +1776,10 @@ int __init gfs2_glock_init(void) } #endif - glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER | - WQ_HIGHPRI | WQ_FREEZEABLE, 0); + glock_workqueue = create_workqueue("glock_workqueue"); if (IS_ERR(glock_workqueue)) return PTR_ERR(glock_workqueue); - gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER | - WQ_FREEZEABLE, 0); + gfs2_delete_workqueue = create_workqueue("delete_workqueue"); if (IS_ERR(gfs2_delete_workqueue)) { destroy_workqueue(glock_workqueue); return PTR_ERR(gfs2_delete_workqueue); diff --git a/trunk/fs/gfs2/glock.h b/trunk/fs/gfs2/glock.h index db1c26d6d220..2bda1911b156 100644 --- a/trunk/fs/gfs2/glock.h +++ b/trunk/fs/gfs2/glock.h @@ -215,7 +215,7 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); /** - * gfs2_glock_nq_init - initialize a holder and enqueue it on a glock + * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock * @gl: the glock * @state: the state we're requesting * @flags: the modifier flags diff --git a/trunk/fs/gfs2/glops.c b/trunk/fs/gfs2/glops.c index 0d149dcc04e5..49f97d3bb690 100644 --- a/trunk/fs/gfs2/glops.c +++ b/trunk/fs/gfs2/glops.c @@ -262,12 +262,13 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) const struct gfs2_inode *ip = gl->gl_object; if (ip == NULL) return 0; - gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", + gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu/%llu\n", (unsigned long long)ip->i_no_formal_ino, (unsigned long long)ip->i_no_addr, IF2DT(ip->i_inode.i_mode), ip->i_flags, (unsigned int)ip->i_diskflags, - (unsigned long long)i_size_read(&ip->i_inode)); + (unsigned long long)ip->i_inode.i_size, + (unsigned long long)ip->i_disksize); return 0; } @@ -452,6 +453,7 @@ const struct gfs2_glock_operations *gfs2_glops_list[] = { [LM_TYPE_META] = &gfs2_meta_glops, [LM_TYPE_INODE] = &gfs2_inode_glops, [LM_TYPE_RGRP] = &gfs2_rgrp_glops, + [LM_TYPE_NONDISK] = &gfs2_trans_glops, [LM_TYPE_IOPEN] = &gfs2_iopen_glops, [LM_TYPE_FLOCK] = &gfs2_flock_glops, [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, diff --git a/trunk/fs/gfs2/incore.h b/trunk/fs/gfs2/incore.h index 764fbb49efc8..fdbf4b366fa5 100644 --- a/trunk/fs/gfs2/incore.h +++ b/trunk/fs/gfs2/incore.h @@ -196,7 +196,6 @@ enum { GLF_REPLY_PENDING = 9, GLF_INITIAL = 10, GLF_FROZEN = 11, - GLF_QUEUED = 12, }; struct gfs2_glock { @@ -268,6 +267,7 @@ struct gfs2_inode { u64 i_no_formal_ino; u64 i_generation; u64 i_eattr; + loff_t i_disksize; unsigned long i_flags; /* GIF_... */ struct gfs2_glock *i_gl; /* Move into i_gh? */ struct gfs2_holder i_iopen_gh; @@ -416,8 +416,11 @@ struct gfs2_args { char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ unsigned int ar_spectator:1; /* Don't get a journal */ + unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */ unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ + unsigned int ar_localcaching:1; /* Local caching */ unsigned int ar_debug:1; /* Oops on errors */ + unsigned int ar_upgrade:1; /* Upgrade ondisk format */ unsigned int ar_posix_acl:1; /* Enable posix acls */ unsigned int ar_quota:2; /* off/account/on */ unsigned int ar_suiddir:1; /* suiddir support */ @@ -494,7 +497,7 @@ struct gfs2_sb_host { */ struct lm_lockstruct { - int ls_jid; + unsigned int ls_jid; unsigned int ls_first; unsigned int ls_first_done; unsigned int ls_nodir; @@ -569,7 +572,6 @@ struct gfs2_sbd { struct list_head sd_rindex_mru_list; struct gfs2_rgrpd *sd_rindex_forward; unsigned int sd_rgrps; - unsigned int sd_max_rg_data; /* Journal index stuff */ diff --git a/trunk/fs/gfs2/inode.c b/trunk/fs/gfs2/inode.c index 06370f8bd8cf..08140f185a37 100644 --- a/trunk/fs/gfs2/inode.c +++ b/trunk/fs/gfs2/inode.c @@ -359,7 +359,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) * to do that. */ ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); - i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); + ip->i_disksize = be64_to_cpu(str->di_size); + i_size_write(&ip->i_inode, ip->i_disksize); gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); atime.tv_sec = be64_to_cpu(str->di_atime); atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); @@ -1054,7 +1055,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_uid = cpu_to_be32(ip->i_inode.i_uid); str->di_gid = cpu_to_be32(ip->i_inode.i_gid); str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); - str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); + str->di_size = cpu_to_be64(ip->i_disksize); str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); @@ -1084,8 +1085,8 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) (unsigned long long)ip->i_no_formal_ino); printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)ip->i_no_addr); - printk(KERN_INFO " i_size = %llu\n", - (unsigned long long)i_size_read(&ip->i_inode)); + printk(KERN_INFO " i_disksize = %llu\n", + (unsigned long long)ip->i_disksize); printk(KERN_INFO " blocks = %llu\n", (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); printk(KERN_INFO " i_goal = %llu\n", diff --git a/trunk/fs/gfs2/inode.h b/trunk/fs/gfs2/inode.h index 6720d7d5fbc6..300ada3f21de 100644 --- a/trunk/fs/gfs2/inode.h +++ b/trunk/fs/gfs2/inode.h @@ -19,8 +19,6 @@ extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); extern int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, char *buf, loff_t *pos, unsigned size); -extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, - unsigned int from, unsigned int to); extern void gfs2_set_aops(struct inode *inode); static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) @@ -82,19 +80,6 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr); } -static inline int gfs2_check_internal_file_size(struct inode *inode, - u64 minsize, u64 maxsize) -{ - u64 size = i_size_read(inode); - if (size < minsize || size > maxsize) - goto err; - if (size & ((1 << inode->i_blkbits) - 1)) - goto err; - return 0; -err: - gfs2_consist_inode(GFS2_I(inode)); - return -EIO; -} extern void gfs2_set_iop(struct inode *inode); extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, diff --git a/trunk/fs/gfs2/lock_dlm.c b/trunk/fs/gfs2/lock_dlm.c index 1c09425b45fd..0e0470ed34c2 100644 --- a/trunk/fs/gfs2/lock_dlm.c +++ b/trunk/fs/gfs2/lock_dlm.c @@ -42,9 +42,9 @@ static void gdlm_ast(void *arg) ret |= LM_OUT_CANCELED; goto out; case -EAGAIN: /* Try lock fails */ - case -EDEADLK: /* Deadlock detected */ goto out; - case -ETIMEDOUT: /* Canceled due to timeout */ + case -EINVAL: /* Invalid */ + case -ENOMEM: /* Out of memory */ ret |= LM_OUT_ERROR; goto out; case 0: /* Success */ diff --git a/trunk/fs/gfs2/main.c b/trunk/fs/gfs2/main.c index d7eb1e209aa8..b1e9630eb46a 100644 --- a/trunk/fs/gfs2/main.c +++ b/trunk/fs/gfs2/main.c @@ -24,7 +24,6 @@ #include "glock.h" #include "quota.h" #include "recovery.h" -#include "dir.h" static struct shrinker qd_shrinker = { .shrink = gfs2_shrink_qd_memory, @@ -79,9 +78,6 @@ static int __init init_gfs2_fs(void) { int error; - gfs2_str2qstr(&gfs2_qdot, "."); - gfs2_str2qstr(&gfs2_qdotdot, ".."); - error = gfs2_sys_init(); if (error) return error; @@ -144,7 +140,7 @@ static int __init init_gfs2_fs(void) error = -ENOMEM; gfs_recovery_wq = alloc_workqueue("gfs_recovery", - WQ_RESCUER | WQ_FREEZEABLE, 0); + WQ_NON_REENTRANT | WQ_RESCUER, 0); if (!gfs_recovery_wq) goto fail_wq; diff --git a/trunk/fs/gfs2/ops_fstype.c b/trunk/fs/gfs2/ops_fstype.c index aeafc233dc89..4d4b1e8ac64c 100644 --- a/trunk/fs/gfs2/ops_fstype.c +++ b/trunk/fs/gfs2/ops_fstype.c @@ -38,6 +38,14 @@ #define DO 0 #define UNDO 1 +static const u32 gfs2_old_fs_formats[] = { + 0 +}; + +static const u32 gfs2_old_multihost_formats[] = { + 0 +}; + /** * gfs2_tune_init - Fill a gfs2_tune structure with default values * @gt: tune @@ -127,6 +135,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) { + unsigned int x; + if (sb->sb_magic != GFS2_MAGIC || sb->sb_type != GFS2_METATYPE_SB) { if (!silent) @@ -140,9 +150,55 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int sile sb->sb_multihost_format == GFS2_FORMAT_MULTI) return 0; - fs_warn(sdp, "Unknown on-disk format, unable to mount\n"); + if (sb->sb_fs_format != GFS2_FORMAT_FS) { + for (x = 0; gfs2_old_fs_formats[x]; x++) + if (gfs2_old_fs_formats[x] == sb->sb_fs_format) + break; - return -EINVAL; + if (!gfs2_old_fs_formats[x]) { + printk(KERN_WARNING + "GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk(KERN_WARNING + "GFS2: I don't know how to upgrade this FS\n"); + return -EINVAL; + } + } + + if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { + for (x = 0; gfs2_old_multihost_formats[x]; x++) + if (gfs2_old_multihost_formats[x] == + sb->sb_multihost_format) + break; + + if (!gfs2_old_multihost_formats[x]) { + printk(KERN_WARNING + "GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk(KERN_WARNING + "GFS2: I don't know how to upgrade this FS\n"); + return -EINVAL; + } + } + + if (!sdp->sd_args.ar_upgrade) { + printk(KERN_WARNING + "GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk(KERN_INFO + "GFS2: Use the \"upgrade\" mount option to upgrade " + "the FS\n"); + printk(KERN_INFO "GFS2: See the manual for more details\n"); + return -EINVAL; + } + + return 0; } static void end_bio_io_page(struct bio *bio, int error) @@ -530,7 +586,7 @@ static int map_journal_extents(struct gfs2_sbd *sdp) prev_db = 0; - for (lb = 0; lb < i_size_read(jd->jd_inode) >> sdp->sd_sb.sb_bsize_shift; lb++) { + for (lb = 0; lb < ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; lb++) { bh.b_state = 0; bh.b_blocknr = 0; bh.b_size = 1 << ip->i_inode.i_blkbits; @@ -966,6 +1022,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) if (!strcmp("lock_nolock", proto)) { lm = &nolock_ops; sdp->sd_args.ar_localflocks = 1; + sdp->sd_args.ar_localcaching = 1; #ifdef CONFIG_GFS2_FS_LOCKING_DLM } else if (!strcmp("lock_dlm", proto)) { lm = &gfs2_dlm_ops; @@ -1056,6 +1113,8 @@ static int gfs2_journalid_wait(void *word) static int wait_on_journal(struct gfs2_sbd *sdp) { + if (sdp->sd_args.ar_spectator) + return 0; if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) return 0; @@ -1158,20 +1217,6 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (error) goto fail_sb; - /* - * If user space has failed to join the cluster or some similar - * failure has occurred, then the journal id will contain a - * negative (error) number. This will then be returned to the - * caller (of the mount syscall). We do this even for spectator - * mounts (which just write a jid of 0 to indicate "ok" even though - * the jid is unused in the spectator case) - */ - if (sdp->sd_lockstruct.ls_jid < 0) { - error = sdp->sd_lockstruct.ls_jid; - sdp->sd_lockstruct.ls_jid = 0; - goto fail_sb; - } - error = init_inodes(sdp, DO); if (error) goto fail_sb; diff --git a/trunk/fs/gfs2/ops_inode.c b/trunk/fs/gfs2/ops_inode.c index 0534510200d5..1009be2c9737 100644 --- a/trunk/fs/gfs2/ops_inode.c +++ b/trunk/fs/gfs2/ops_inode.c @@ -18,8 +18,6 @@ #include #include #include -#include -#include #include #include "gfs2.h" @@ -219,7 +217,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(al) + + al->al_rgd->rd_length + 2 * RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) @@ -408,6 +406,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, ip = ghs[1].gh_gl->gl_object; + ip->i_disksize = size; i_size_write(inode, size); error = gfs2_meta_inode_buffer(ip, &dibh); @@ -462,7 +461,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) ip = ghs[1].gh_gl->gl_object; ip->i_inode.i_nlink = 2; - i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)); + ip->i_disksize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); ip->i_diskflags |= GFS2_DIF_JDATA; ip->i_entries = 2; @@ -471,15 +470,18 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (!gfs2_assert_withdraw(sdp, !error)) { struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); + struct qstr str; + gfs2_str2qstr(&str, "."); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent); + gfs2_qstr2dirent(&str, GFS2_DIRENT_SIZE(str.len), dent); dent->de_inum = di->di_num; /* already GFS2 endian */ dent->de_type = cpu_to_be16(DT_DIR); di->di_entries = cpu_to_be32(1); + gfs2_str2qstr(&str, ".."); dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); - gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); + gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); gfs2_inum_out(dip, dent); dent->de_type = cpu_to_be16(DT_DIR); @@ -520,6 +522,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip) { + struct qstr dotname; int error; if (ip->i_entries != 2) { @@ -536,11 +539,13 @@ static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, if (error) return error; - error = gfs2_dir_del(ip, &gfs2_qdot); + gfs2_str2qstr(&dotname, "."); + error = gfs2_dir_del(ip, &dotname); if (error) return error; - error = gfs2_dir_del(ip, &gfs2_qdotdot); + gfs2_str2qstr(&dotname, ".."); + error = gfs2_dir_del(ip, &dotname); if (error) return error; @@ -689,8 +694,11 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) struct inode *dir = &to->i_inode; struct super_block *sb = dir->i_sb; struct inode *tmp; + struct qstr dotdot; int error = 0; + gfs2_str2qstr(&dotdot, ".."); + igrab(dir); for (;;) { @@ -703,7 +711,7 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) break; } - tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); + tmp = gfs2_lookupi(dir, &dotdot, 1); if (IS_ERR(tmp)) { error = PTR_ERR(tmp); break; @@ -736,7 +744,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, struct gfs2_inode *ip = GFS2_I(odentry->d_inode); struct gfs2_inode *nip = NULL; struct gfs2_sbd *sdp = GFS2_SB(odir); - struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh; + struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; struct gfs2_rgrpd *nrgd; unsigned int num_gh; int dir_rename = 0; @@ -750,9 +758,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, return 0; } - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - return error; if (odip != ndip) { error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, @@ -882,12 +887,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, al->al_requested = sdp->sd_max_dirres; - error = gfs2_inplace_reserve_ri(ndip); + error = gfs2_inplace_reserve(ndip); if (error) goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(al) + + al->al_rgd->rd_length + 4 * RES_DINODE + 4 * RES_LEAF + RES_STATFS + RES_QUOTA + 4, 0); if (error) @@ -915,6 +920,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, } if (dir_rename) { + struct qstr name; + gfs2_str2qstr(&name, ".."); + error = gfs2_change_nlink(ndip, +1); if (error) goto out_end_trans; @@ -922,7 +930,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_end_trans; - error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); + error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR); if (error) goto out_end_trans; } else { @@ -964,7 +972,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (r_gh.gh_gl) gfs2_glock_dq_uninit(&r_gh); out: - gfs2_glock_dq_uninit(&ri_gh); return error; } @@ -983,7 +990,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) struct gfs2_inode *ip = GFS2_I(dentry->d_inode); struct gfs2_holder i_gh; struct buffer_head *dibh; - unsigned int x, size; + unsigned int x; char *buf; int error; @@ -995,8 +1002,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } - size = (unsigned int)i_size_read(&ip->i_inode); - if (size == 0) { + if (!ip->i_disksize) { gfs2_consist_inode(ip); buf = ERR_PTR(-EIO); goto out; @@ -1008,7 +1014,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) goto out; } - x = size + 1; + x = ip->i_disksize + 1; buf = kmalloc(x, GFP_NOFS); if (!buf) buf = ERR_PTR(-ENOMEM); @@ -1065,6 +1071,30 @@ int gfs2_permission(struct inode *inode, int mask) return error; } +/* + * XXX(truncate): the truncate_setsize calls should be moved to the end. + */ +static int setattr_size(struct inode *inode, struct iattr *attr) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + int error; + + if (attr->ia_size != ip->i_disksize) { + error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); + if (error) + return error; + truncate_setsize(inode, attr->ia_size); + gfs2_trans_end(sdp); + } + + error = gfs2_truncatei(ip, attr->ia_size); + if (error && (inode->i_size != ip->i_disksize)) + i_size_write(inode, ip->i_disksize); + + return error; +} + static int setattr_chown(struct inode *inode, struct iattr *attr) { struct gfs2_inode *ip = GFS2_I(inode); @@ -1165,7 +1195,7 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) goto out; if (attr->ia_valid & ATTR_SIZE) - error = gfs2_setattr_size(inode, attr->ia_size); + error = setattr_size(inode, attr); else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) error = setattr_chown(inode, attr); else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) @@ -1271,257 +1301,6 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) return ret; } -static void empty_write_end(struct page *page, unsigned from, - unsigned to) -{ - struct gfs2_inode *ip = GFS2_I(page->mapping->host); - - page_zero_new_buffers(page, from, to); - flush_dcache_page(page); - mark_page_accessed(page); - - if (!gfs2_is_writeback(ip)) - gfs2_page_add_databufs(ip, page, from, to); - - block_commit_write(page, from, to); -} - - -static int write_empty_blocks(struct page *page, unsigned from, unsigned to) -{ - unsigned start, end, next; - struct buffer_head *bh, *head; - int error; - - if (!page_has_buffers(page)) { - error = block_prepare_write(page, from, to, gfs2_block_map); - if (unlikely(error)) - return error; - - empty_write_end(page, from, to); - return 0; - } - - bh = head = page_buffers(page); - next = end = 0; - while (next < from) { - next += bh->b_size; - bh = bh->b_this_page; - } - start = next; - do { - next += bh->b_size; - if (buffer_mapped(bh)) { - if (end) { - error = block_prepare_write(page, start, end, - gfs2_block_map); - if (unlikely(error)) - return error; - empty_write_end(page, start, end); - end = 0; - } - start = next; - } - else - end = next; - bh = bh->b_this_page; - } while (next < to); - - if (end) { - error = block_prepare_write(page, start, end, gfs2_block_map); - if (unlikely(error)) - return error; - empty_write_end(page, start, end); - } - - return 0; -} - -static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, - int mode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct buffer_head *dibh; - int error; - u64 start = offset >> PAGE_CACHE_SHIFT; - unsigned int start_offset = offset & ~PAGE_CACHE_MASK; - u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; - pgoff_t curr; - struct page *page; - unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; - unsigned int from, to; - - if (!end_offset) - end_offset = PAGE_CACHE_SIZE; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (unlikely(error)) - goto out; - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - - if (gfs2_is_stuffed(ip)) { - error = gfs2_unstuff_dinode(ip, NULL); - if (unlikely(error)) - goto out; - } - - curr = start; - offset = start << PAGE_CACHE_SHIFT; - from = start_offset; - to = PAGE_CACHE_SIZE; - while (curr <= end) { - page = grab_cache_page_write_begin(inode->i_mapping, curr, - AOP_FLAG_NOFS); - if (unlikely(!page)) { - error = -ENOMEM; - goto out; - } - - if (curr == end) - to = end_offset; - error = write_empty_blocks(page, from, to); - if (!error && offset + to > inode->i_size && - !(mode & FALLOC_FL_KEEP_SIZE)) { - i_size_write(inode, offset + to); - } - unlock_page(page); - page_cache_release(page); - if (error) - goto out; - curr++; - offset += PAGE_CACHE_SIZE; - from = 0; - } - - gfs2_dinode_out(ip, dibh->b_data); - mark_inode_dirty(inode); - - brelse(dibh); - -out: - return error; -} - -static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, - unsigned int *data_blocks, unsigned int *ind_blocks) -{ - const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; - unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); - - for (tmp = max_data; tmp > sdp->sd_diptrs;) { - tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); - max_data -= tmp; - } - /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, - so it might end up with fewer data blocks */ - if (max_data <= *data_blocks) - return; - *data_blocks = max_data; - *ind_blocks = max_blocks - max_data; - *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; - if (*len > max) { - *len = max; - gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); - } -} - -static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset, - loff_t len) -{ - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_inode *ip = GFS2_I(inode); - unsigned int data_blocks = 0, ind_blocks = 0, rblocks; - loff_t bytes, max_bytes; - struct gfs2_alloc *al; - int error; - loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; - next = (next + 1) << sdp->sd_sb.sb_bsize_shift; - - offset = (offset >> sdp->sd_sb.sb_bsize_shift) << - sdp->sd_sb.sb_bsize_shift; - - len = next - offset; - bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; - if (!bytes) - bytes = UINT_MAX; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); - error = gfs2_glock_nq(&ip->i_gh); - if (unlikely(error)) - goto out_uninit; - - if (!gfs2_write_alloc_required(ip, offset, len)) - goto out_unlock; - - while (len > 0) { - if (len < bytes) - bytes = len; - al = gfs2_alloc_get(ip); - if (!al) { - error = -ENOMEM; - goto out_unlock; - } - - error = gfs2_quota_lock_check(ip); - if (error) - goto out_alloc_put; - -retry: - gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); - - al->al_requested = data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip); - if (error) { - if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { - bytes >>= 1; - goto retry; - } - goto out_qunlock; - } - max_bytes = bytes; - calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); - al->al_requested = data_blocks + ind_blocks; - - rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + - RES_RG_HDR + gfs2_rg_blocks(al); - if (gfs2_is_jdata(ip)) - rblocks += data_blocks ? data_blocks : 1; - - error = gfs2_trans_begin(sdp, rblocks, - PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); - if (error) - goto out_trans_fail; - - error = fallocate_chunk(inode, offset, max_bytes, mode); - gfs2_trans_end(sdp); - - if (error) - goto out_trans_fail; - - len -= max_bytes; - offset += max_bytes; - gfs2_inplace_release(ip); - gfs2_quota_unlock(ip); - gfs2_alloc_put(ip); - } - goto out_unlock; - -out_trans_fail: - gfs2_inplace_release(ip); -out_qunlock: - gfs2_quota_unlock(ip); -out_alloc_put: - gfs2_alloc_put(ip); -out_unlock: - gfs2_glock_dq(&ip->i_gh); -out_uninit: - gfs2_holder_uninit(&ip->i_gh); - return error; -} - - static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { @@ -1572,7 +1351,6 @@ const struct inode_operations gfs2_file_iops = { .getxattr = gfs2_getxattr, .listxattr = gfs2_listxattr, .removexattr = gfs2_removexattr, - .fallocate = gfs2_fallocate, .fiemap = gfs2_fiemap, }; diff --git a/trunk/fs/gfs2/quota.c b/trunk/fs/gfs2/quota.c index 58a9b9998b42..1bc6b5695e6d 100644 --- a/trunk/fs/gfs2/quota.c +++ b/trunk/fs/gfs2/quota.c @@ -735,8 +735,10 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, goto out; size = loc + sizeof(struct gfs2_quota); - if (size > inode->i_size) + if (size > inode->i_size) { + ip->i_disksize = size; i_size_write(inode, size); + } inode->i_mtime = inode->i_atime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); @@ -815,7 +817,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) goto out_alloc; if (nalloc) - blocks += gfs2_rg_blocks(al) + nalloc * ind_blocks + RES_STATFS; + blocks += al->al_rgd->rd_length + nalloc * ind_blocks + RES_STATFS; error = gfs2_trans_begin(sdp, blocks, 0); if (error) @@ -1188,17 +1190,18 @@ static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void * int gfs2_quota_init(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); - u64 size = i_size_read(sdp->sd_qc_inode); - unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift; + unsigned int blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; unsigned int x, slot = 0; unsigned int found = 0; u64 dblock; u32 extlen = 0; int error; - if (gfs2_check_internal_file_size(sdp->sd_qc_inode, 1, 64 << 20)) + if (!ip->i_disksize || ip->i_disksize > (64 << 20) || + ip->i_disksize & (sdp->sd_sb.sb_bsize - 1)) { + gfs2_consist_inode(ip); return -EIO; - + } sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); @@ -1586,7 +1589,6 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, error = gfs2_inplace_reserve(ip); if (error) goto out_alloc; - blocks += gfs2_rg_blocks(al); } error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); diff --git a/trunk/fs/gfs2/recovery.c b/trunk/fs/gfs2/recovery.c index f2a02edcac8f..f7f89a94a5a4 100644 --- a/trunk/fs/gfs2/recovery.c +++ b/trunk/fs/gfs2/recovery.c @@ -455,13 +455,11 @@ void gfs2_recover_func(struct work_struct *work) int ro = 0; unsigned int pass; int error; - int jlocked = 0; - if (sdp->sd_args.ar_spectator || - (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) { + if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", jd->jd_jid); - jlocked = 1; + /* Acquire the journal lock so we can do recovery */ error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, @@ -556,12 +554,13 @@ void gfs2_recover_func(struct work_struct *work) jd->jd_jid, t); } + if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) + gfs2_glock_dq_uninit(&ji_gh); + gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); - if (jlocked) { - gfs2_glock_dq_uninit(&ji_gh); + if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) gfs2_glock_dq_uninit(&j_gh); - } fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); goto done; @@ -569,7 +568,7 @@ void gfs2_recover_func(struct work_struct *work) fail_gunlock_tr: gfs2_glock_dq_uninit(&t_gh); fail_gunlock_ji: - if (jlocked) { + if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { gfs2_glock_dq_uninit(&ji_gh); fail_gunlock_j: gfs2_glock_dq_uninit(&j_gh); diff --git a/trunk/fs/gfs2/rgrp.c b/trunk/fs/gfs2/rgrp.c index fb67f593f408..171a744f8e45 100644 --- a/trunk/fs/gfs2/rgrp.c +++ b/trunk/fs/gfs2/rgrp.c @@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp) for (rgrps = 0;; rgrps++) { loff_t pos = rgrps * sizeof(struct gfs2_rindex); - if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode)) + if (pos + sizeof(struct gfs2_rindex) >= ip->i_disksize) break; error = gfs2_internal_read(ip, &ra_state, buf, &pos, sizeof(struct gfs2_rindex)); @@ -588,9 +588,7 @@ static int gfs2_ri_update(struct gfs2_inode *ip) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct inode *inode = &ip->i_inode; struct file_ra_state ra_state; - u64 rgrp_count = i_size_read(inode); - struct gfs2_rgrpd *rgd; - unsigned int max_data = 0; + u64 rgrp_count = ip->i_disksize; int error; do_div(rgrp_count, sizeof(struct gfs2_rindex)); @@ -605,10 +603,6 @@ static int gfs2_ri_update(struct gfs2_inode *ip) } } - list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) - if (rgd->rd_data > max_data) - max_data = rgd->rd_data; - sdp->sd_max_rg_data = max_data; sdp->sd_rindex_uptodate = 1; return 0; } @@ -628,15 +622,13 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct inode *inode = &ip->i_inode; struct file_ra_state ra_state; - struct gfs2_rgrpd *rgd; - unsigned int max_data = 0; int error; file_ra_state_init(&ra_state, inode->i_mapping); for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { /* Ignore partials */ if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > - i_size_read(inode)) + ip->i_disksize) break; error = read_rindex_entry(ip, &ra_state); if (error) { @@ -644,10 +636,6 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip) return error; } } - list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) - if (rgd->rd_data > max_data) - max_data = rgd->rd_data; - sdp->sd_max_rg_data = max_data; sdp->sd_rindex_uptodate = 1; return 0; @@ -1200,8 +1188,7 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, * Returns: errno */ -int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, - char *file, unsigned int line) +int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_alloc *al = ip->i_alloc; @@ -1212,15 +1199,12 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, return -EINVAL; try_again: - if (hold_rindex) { - /* We need to hold the rindex unless the inode we're using is - the rindex itself, in which case it's already held. */ - if (ip != GFS2_I(sdp->sd_rindex)) - error = gfs2_rindex_hold(sdp, &al->al_ri_gh); - else if (!sdp->sd_rgrps) /* We may not have the rindex read - in, so: */ - error = gfs2_ri_update_special(ip); - } + /* We need to hold the rindex unless the inode we're using is + the rindex itself, in which case it's already held. */ + if (ip != GFS2_I(sdp->sd_rindex)) + error = gfs2_rindex_hold(sdp, &al->al_ri_gh); + else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */ + error = gfs2_ri_update_special(ip); if (error) return error; @@ -1231,7 +1215,7 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, try to free it, and try the allocation again. */ error = get_local_rgrp(ip, &unlinked, &last_unlinked); if (error) { - if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) + if (ip != GFS2_I(sdp->sd_rindex)) gfs2_glock_dq_uninit(&al->al_ri_gh); if (error != -EAGAIN) return error; @@ -1273,7 +1257,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) al->al_rgd = NULL; if (al->al_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&al->al_rgd_gh); - if (ip != GFS2_I(sdp->sd_rindex) && al->al_ri_gh.gh_gl) + if (ip != GFS2_I(sdp->sd_rindex)) gfs2_glock_dq_uninit(&al->al_ri_gh); } @@ -1512,19 +1496,11 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; struct gfs2_alloc *al = ip->i_alloc; - struct gfs2_rgrpd *rgd; + struct gfs2_rgrpd *rgd = al->al_rgd; u32 goal, blk; u64 block; int error; - /* Only happens if there is a bug in gfs2, return something distinctive - * to ensure that it is noticed. - */ - if (al == NULL) - return -ECANCELED; - - rgd = al->al_rgd; - if (rgrp_contains_block(rgd, ip->i_goal)) goal = ip->i_goal - rgd->rd_data0; else diff --git a/trunk/fs/gfs2/rgrp.h b/trunk/fs/gfs2/rgrp.h index 0e35c0466f9a..f07119d89557 100644 --- a/trunk/fs/gfs2/rgrp.h +++ b/trunk/fs/gfs2/rgrp.h @@ -39,12 +39,10 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip) ip->i_alloc = NULL; } -extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, - char *file, unsigned int line); +extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, + unsigned int line); #define gfs2_inplace_reserve(ip) \ - gfs2_inplace_reserve_i((ip), 1, __FILE__, __LINE__) -#define gfs2_inplace_reserve_ri(ip) \ - gfs2_inplace_reserve_i((ip), 0, __FILE__, __LINE__) +gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) extern void gfs2_inplace_release(struct gfs2_inode *ip); diff --git a/trunk/fs/gfs2/super.c b/trunk/fs/gfs2/super.c index 047d1176096c..77cb9f830ee4 100644 --- a/trunk/fs/gfs2/super.c +++ b/trunk/fs/gfs2/super.c @@ -85,7 +85,6 @@ static const match_table_t tokens = { {Opt_locktable, "locktable=%s"}, {Opt_hostdata, "hostdata=%s"}, {Opt_spectator, "spectator"}, - {Opt_spectator, "norecovery"}, {Opt_ignore_local_fs, "ignore_local_fs"}, {Opt_localflocks, "localflocks"}, {Opt_localcaching, "localcaching"}, @@ -160,13 +159,13 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) args->ar_spectator = 1; break; case Opt_ignore_local_fs: - /* Retained for backwards compat only */ + args->ar_ignore_local_fs = 1; break; case Opt_localflocks: args->ar_localflocks = 1; break; case Opt_localcaching: - /* Retained for backwards compat only */ + args->ar_localcaching = 1; break; case Opt_debug: if (args->ar_errors == GFS2_ERRORS_PANIC) { @@ -180,7 +179,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) args->ar_debug = 0; break; case Opt_upgrade: - /* Retained for backwards compat only */ + args->ar_upgrade = 1; break; case Opt_acl: args->ar_posix_acl = 1; @@ -343,14 +342,15 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - u64 size = i_size_read(jd->jd_inode); - if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, 1 << 30)) + if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) || + (ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) { + gfs2_consist_inode(ip); return -EIO; + } + jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; - jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift; - - if (gfs2_write_alloc_required(ip, 0, size)) { + if (gfs2_write_alloc_required(ip, 0, ip->i_disksize)) { gfs2_consist_inode(ip); return -EIO; } @@ -1129,7 +1129,9 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) /* Some flags must not be changed */ if (args_neq(&args, &sdp->sd_args, spectator) || + args_neq(&args, &sdp->sd_args, ignore_local_fs) || args_neq(&args, &sdp->sd_args, localflocks) || + args_neq(&args, &sdp->sd_args, localcaching) || args_neq(&args, &sdp->sd_args, meta)) return -EINVAL; @@ -1232,10 +1234,16 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) seq_printf(s, ",hostdata=%s", args->ar_hostdata); if (args->ar_spectator) seq_printf(s, ",spectator"); + if (args->ar_ignore_local_fs) + seq_printf(s, ",ignore_local_fs"); if (args->ar_localflocks) seq_printf(s, ",localflocks"); + if (args->ar_localcaching) + seq_printf(s, ",localcaching"); if (args->ar_debug) seq_printf(s, ",debug"); + if (args->ar_upgrade) + seq_printf(s, ",upgrade"); if (args->ar_posix_acl) seq_printf(s, ",acl"); if (args->ar_quota != GFS2_QUOTA_DEFAULT) { diff --git a/trunk/fs/gfs2/sys.c b/trunk/fs/gfs2/sys.c index 748ccb557c18..ccacffd2faaa 100644 --- a/trunk/fs/gfs2/sys.c +++ b/trunk/fs/gfs2/sys.c @@ -230,10 +230,7 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len if (gltype > LM_TYPE_JOURNAL) return -EINVAL; - if (gltype == LM_TYPE_NONDISK && glnum == GFS2_TRANS_LOCK) - glops = &gfs2_trans_glops; - else - glops = gfs2_glops_list[gltype]; + glops = gfs2_glops_list[gltype]; if (glops == NULL) return -EINVAL; if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags)) @@ -402,32 +399,31 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) { - return sprintf(buf, "%d\n", sdp->sd_lockstruct.ls_jid); + return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid); } static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { - int jid; + unsigned jid; int rv; - rv = sscanf(buf, "%d", &jid); + rv = sscanf(buf, "%u", &jid); if (rv != 1) return -EINVAL; spin_lock(&sdp->sd_jindex_spin); rv = -EINVAL; + if (sdp->sd_args.ar_spectator) + goto out; if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) goto out; rv = -EBUSY; - if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) + if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) goto out; - rv = 0; - if (sdp->sd_args.ar_spectator && jid > 0) - rv = jid = -EINVAL; sdp->sd_lockstruct.ls_jid = jid; - clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); smp_mb__after_clear_bit(); wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); + rv = 0; out: spin_unlock(&sdp->sd_jindex_spin); return rv ? rv : len; @@ -621,7 +617,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj, add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) - add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid); + add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid); if (gfs2_uuid_valid(uuid)) add_uevent_var(env, "UUID=%pUB", uuid); return 0; diff --git a/trunk/fs/gfs2/trace_gfs2.h b/trunk/fs/gfs2/trace_gfs2.h index cedb0bb96d96..148d55c14171 100644 --- a/trunk/fs/gfs2/trace_gfs2.h +++ b/trunk/fs/gfs2/trace_gfs2.h @@ -39,8 +39,7 @@ {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ {(1UL << GLF_REPLY_PENDING), "r" }, \ {(1UL << GLF_INITIAL), "I" }, \ - {(1UL << GLF_FROZEN), "F" }, \ - {(1UL << GLF_QUEUED), "q" }) + {(1UL << GLF_FROZEN), "F" }) #ifndef NUMPTY #define NUMPTY diff --git a/trunk/fs/gfs2/trans.h b/trunk/fs/gfs2/trans.h index fb56b783e028..edf9d4bd908e 100644 --- a/trunk/fs/gfs2/trans.h +++ b/trunk/fs/gfs2/trans.h @@ -20,20 +20,11 @@ struct gfs2_glock; #define RES_JDATA 1 #define RES_DATA 1 #define RES_LEAF 1 -#define RES_RG_HDR 1 #define RES_RG_BIT 2 #define RES_EATTR 1 #define RES_STATFS 1 #define RES_QUOTA 2 -/* reserve either the number of blocks to be allocated plus the rg header - * block, or all of the blocks in the rg, whichever is smaller */ -static inline unsigned int gfs2_rg_blocks(const struct gfs2_alloc *al) -{ - return (al->al_requested < al->al_rgd->rd_length)? - al->al_requested + 1 : al->al_rgd->rd_length; -} - int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, unsigned int revokes); diff --git a/trunk/fs/gfs2/xattr.c b/trunk/fs/gfs2/xattr.c index 30b58f07c8a6..776af6eb4bcb 100644 --- a/trunk/fs/gfs2/xattr.c +++ b/trunk/fs/gfs2/xattr.c @@ -734,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, goto out_gunlock_q; error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), - blks + gfs2_rg_blocks(al) + + blks + al->al_rgd->rd_length + RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) goto out_ipres; diff --git a/trunk/fs/hfsplus/bfind.c b/trunk/fs/hfsplus/bfind.c index d182438c7ae4..5007a41f1be9 100644 --- a/trunk/fs/hfsplus/bfind.c +++ b/trunk/fs/hfsplus/bfind.c @@ -23,7 +23,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) fd->search_key = ptr; fd->key = ptr + tree->max_key_len + 2; dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); - mutex_lock(&tree->tree_lock); + down(&tree->tree_lock); return 0; } @@ -32,7 +32,7 @@ void hfs_find_exit(struct hfs_find_data *fd) hfs_bnode_put(fd->bnode); kfree(fd->search_key); dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); - mutex_unlock(&fd->tree->tree_lock); + up(&fd->tree->tree_lock); fd->tree = NULL; } @@ -52,10 +52,6 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) rec = (e + b) / 2; len = hfs_brec_lenoff(bnode, rec, &off); keylen = hfs_brec_keylen(bnode, rec); - if (keylen == 0) { - res = -EINVAL; - goto fail; - } hfs_bnode_read(bnode, fd->key, off, keylen); cmpval = bnode->tree->keycmp(fd->key, fd->search_key); if (!cmpval) { @@ -71,10 +67,6 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) if (rec != e && e >= 0) { len = hfs_brec_lenoff(bnode, e, &off); keylen = hfs_brec_keylen(bnode, e); - if (keylen == 0) { - res = -EINVAL; - goto fail; - } hfs_bnode_read(bnode, fd->key, off, keylen); } done: @@ -83,7 +75,6 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) fd->keylength = keylen; fd->entryoffset = off + keylen; fd->entrylength = len - keylen; -fail: return res; } @@ -207,10 +198,6 @@ int hfs_brec_goto(struct hfs_find_data *fd, int cnt) len = hfs_brec_lenoff(bnode, fd->record, &off); keylen = hfs_brec_keylen(bnode, fd->record); - if (keylen == 0) { - res = -EINVAL; - goto out; - } fd->keyoffset = off; fd->keylength = keylen; fd->entryoffset = off + keylen; diff --git a/trunk/fs/hfsplus/bitmap.c b/trunk/fs/hfsplus/bitmap.c index ad57f5991eb1..ea30afc2a03c 100644 --- a/trunk/fs/hfsplus/bitmap.c +++ b/trunk/fs/hfsplus/bitmap.c @@ -17,7 +17,6 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct page *page; struct address_space *mapping; __be32 *pptr, *curr, *end; @@ -30,8 +29,8 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma return size; dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); - mutex_lock(&sbi->alloc_mutex); - mapping = sbi->alloc_file->i_mapping; + mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); + mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); if (IS_ERR(page)) { start = size; @@ -151,17 +150,16 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma set_page_dirty(page); kunmap(page); *max = offset + (curr - pptr) * 32 + i - start; - sbi->free_blocks -= *max; + HFSPLUS_SB(sb).free_blocks -= *max; sb->s_dirt = 1; dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); out: - mutex_unlock(&sbi->alloc_mutex); + mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); return start; } int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct page *page; struct address_space *mapping; __be32 *pptr, *curr, *end; @@ -174,11 +172,11 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); /* are all of the bits in range? */ - if ((offset + count) > sbi->total_blocks) + if ((offset + count) > HFSPLUS_SB(sb).total_blocks) return -2; - mutex_lock(&sbi->alloc_mutex); - mapping = sbi->alloc_file->i_mapping; + mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); + mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; pnr = offset / PAGE_CACHE_BITS; page = read_mapping_page(mapping, pnr, NULL); pptr = kmap(page); @@ -226,9 +224,9 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) out: set_page_dirty(page); kunmap(page); - sbi->free_blocks += len; + HFSPLUS_SB(sb).free_blocks += len; sb->s_dirt = 1; - mutex_unlock(&sbi->alloc_mutex); + mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); return 0; } diff --git a/trunk/fs/hfsplus/brec.c b/trunk/fs/hfsplus/brec.c index 2f39d05443e1..c88e5d72a402 100644 --- a/trunk/fs/hfsplus/brec.c +++ b/trunk/fs/hfsplus/brec.c @@ -42,13 +42,10 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); if (!recoff) return 0; - - retval = hfs_bnode_read_u16(node, recoff) + 2; - if (retval > node->tree->max_key_len + 2) { - printk(KERN_ERR "hfs: keylen %d too large\n", - retval); - retval = 0; - } + if (node->tree->attributes & HFS_TREE_BIGKEYS) + retval = hfs_bnode_read_u16(node, recoff) + 2; + else + retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1; } return retval; } @@ -219,7 +216,7 @@ int hfs_brec_remove(struct hfs_find_data *fd) static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) { struct hfs_btree *tree; - struct hfs_bnode *node, *new_node, *next_node; + struct hfs_bnode *node, *new_node; struct hfs_bnode_desc node_desc; int num_recs, new_rec_off, new_off, old_rec_off; int data_start, data_end, size; @@ -238,17 +235,6 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) new_node->type = node->type; new_node->height = node->height; - if (node->next) - next_node = hfs_bnode_find(tree, node->next); - else - next_node = NULL; - - if (IS_ERR(next_node)) { - hfs_bnode_put(node); - hfs_bnode_put(new_node); - return next_node; - } - size = tree->node_size / 2 - node->num_recs * 2 - 14; old_rec_off = tree->node_size - 4; num_recs = 1; @@ -262,8 +248,6 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) /* panic? */ hfs_bnode_put(node); hfs_bnode_put(new_node); - if (next_node) - hfs_bnode_put(next_node); return ERR_PTR(-ENOSPC); } @@ -318,7 +302,8 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc)); /* update next bnode header */ - if (next_node) { + if (new_node->next) { + struct hfs_bnode *next_node = hfs_bnode_find(tree, new_node->next); next_node->prev = new_node->this; hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc)); node_desc.prev = cpu_to_be32(next_node->prev); diff --git a/trunk/fs/hfsplus/btree.c b/trunk/fs/hfsplus/btree.c index 22e4d4e32999..e49fcee1e293 100644 --- a/trunk/fs/hfsplus/btree.c +++ b/trunk/fs/hfsplus/btree.c @@ -30,7 +30,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) if (!tree) return NULL; - mutex_init(&tree->tree_lock); + init_MUTEX(&tree->tree_lock); spin_lock_init(&tree->hash_lock); tree->sb = sb; tree->cnid = id; @@ -39,16 +39,10 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) goto free_tree; tree->inode = inode; - if (!HFSPLUS_I(tree->inode)->first_blocks) { - printk(KERN_ERR - "hfs: invalid btree extent records (0 size).\n"); - goto free_inode; - } - mapping = tree->inode->i_mapping; page = read_mapping_page(mapping, 0, NULL); if (IS_ERR(page)) - goto free_inode; + goto free_tree; /* Load the header */ head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); @@ -63,56 +57,27 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) tree->max_key_len = be16_to_cpu(head->max_key_len); tree->depth = be16_to_cpu(head->depth); - /* Verify the tree and set the correct compare function */ - switch (id) { - case HFSPLUS_EXT_CNID: - if (tree->max_key_len != HFSPLUS_EXT_KEYLEN - sizeof(u16)) { - printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", - tree->max_key_len); - goto fail_page; - } - if (tree->attributes & HFS_TREE_VARIDXKEYS) { - printk(KERN_ERR "hfs: invalid extent btree flag\n"); - goto fail_page; - } - + /* Set the correct compare function */ + if (id == HFSPLUS_EXT_CNID) { tree->keycmp = hfsplus_ext_cmp_key; - break; - case HFSPLUS_CAT_CNID: - if (tree->max_key_len != HFSPLUS_CAT_KEYLEN - sizeof(u16)) { - printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", - tree->max_key_len); - goto fail_page; - } - if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) { - printk(KERN_ERR "hfs: invalid catalog btree flag\n"); - goto fail_page; - } - - if (test_bit(HFSPLUS_SB_HFSX, &HFSPLUS_SB(sb)->flags) && + } else if (id == HFSPLUS_CAT_CNID) { + if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) && (head->key_type == HFSPLUS_KEY_BINARY)) tree->keycmp = hfsplus_cat_bin_cmp_key; else { tree->keycmp = hfsplus_cat_case_cmp_key; - set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); + HFSPLUS_SB(sb).flags |= HFSPLUS_SB_CASEFOLD; } - break; - default: + } else { printk(KERN_ERR "hfs: unknown B*Tree requested\n"); goto fail_page; } - if (!(tree->attributes & HFS_TREE_BIGKEYS)) { - printk(KERN_ERR "hfs: invalid btree flag\n"); - goto fail_page; - } - size = tree->node_size; if (!is_power_of_2(size)) goto fail_page; if (!tree->node_count) goto fail_page; - tree->node_size_shift = ffs(size) - 1; tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; @@ -122,11 +87,10 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) return tree; fail_page: - page_cache_release(page); - free_inode: tree->inode->i_mapping->a_ops = &hfsplus_aops; - iput(tree->inode); + page_cache_release(page); free_tree: + iput(tree->inode); kfree(tree); return NULL; } @@ -228,18 +192,17 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) while (!tree->free_nodes) { struct inode *inode = tree->inode; - struct hfsplus_inode_info *hip = HFSPLUS_I(inode); u32 count; int res; res = hfsplus_file_extend(inode); if (res) return ERR_PTR(res); - hip->phys_size = inode->i_size = - (loff_t)hip->alloc_blocks << - HFSPLUS_SB(tree->sb)->alloc_blksz_shift; - hip->fs_blocks = - hip->alloc_blocks << HFSPLUS_SB(tree->sb)->fs_shift; + HFSPLUS_I(inode).phys_size = inode->i_size = + (loff_t)HFSPLUS_I(inode).alloc_blocks << + HFSPLUS_SB(tree->sb).alloc_blksz_shift; + HFSPLUS_I(inode).fs_blocks = HFSPLUS_I(inode).alloc_blocks << + HFSPLUS_SB(tree->sb).fs_shift; inode_set_bytes(inode, inode->i_size); count = inode->i_size >> tree->node_size_shift; tree->free_nodes = count - tree->node_count; diff --git a/trunk/fs/hfsplus/catalog.c b/trunk/fs/hfsplus/catalog.c index 8af45fc5b051..f6874acb2cf2 100644 --- a/trunk/fs/hfsplus/catalog.c +++ b/trunk/fs/hfsplus/catalog.c @@ -67,7 +67,7 @@ static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent, key->key_len = cpu_to_be16(6 + ustrlen); } -void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms) +static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) { if (inode->i_flags & S_IMMUTABLE) perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; @@ -77,24 +77,15 @@ void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms) perms->rootflags |= HFSPLUS_FLG_APPEND; else perms->rootflags &= ~HFSPLUS_FLG_APPEND; - - perms->userflags = HFSPLUS_I(inode)->userflags; + HFSPLUS_I(inode).rootflags = perms->rootflags; + HFSPLUS_I(inode).userflags = perms->userflags; perms->mode = cpu_to_be16(inode->i_mode); perms->owner = cpu_to_be32(inode->i_uid); perms->group = cpu_to_be32(inode->i_gid); - - if (S_ISREG(inode->i_mode)) - perms->dev = cpu_to_be32(inode->i_nlink); - else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) - perms->dev = cpu_to_be32(inode->i_rdev); - else - perms->dev = 0; } static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); - if (S_ISDIR(inode->i_mode)) { struct hfsplus_cat_folder *folder; @@ -102,13 +93,13 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i memset(folder, 0, sizeof(*folder)); folder->type = cpu_to_be16(HFSPLUS_FOLDER); folder->id = cpu_to_be32(inode->i_ino); - HFSPLUS_I(inode)->create_date = + HFSPLUS_I(inode).create_date = folder->create_date = folder->content_mod_date = folder->attribute_mod_date = folder->access_date = hfsp_now2mt(); - hfsplus_cat_set_perms(inode, &folder->permissions); - if (inode == sbi->hidden_dir) + hfsplus_set_perms(inode, &folder->permissions); + if (inode == HFSPLUS_SB(inode->i_sb).hidden_dir) /* invisible and namelocked */ folder->user_info.frFlags = cpu_to_be16(0x5000); return sizeof(*folder); @@ -120,19 +111,19 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i file->type = cpu_to_be16(HFSPLUS_FILE); file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS); file->id = cpu_to_be32(cnid); - HFSPLUS_I(inode)->create_date = + HFSPLUS_I(inode).create_date = file->create_date = file->content_mod_date = file->attribute_mod_date = file->access_date = hfsp_now2mt(); if (cnid == inode->i_ino) { - hfsplus_cat_set_perms(inode, &file->permissions); + hfsplus_set_perms(inode, &file->permissions); if (S_ISLNK(inode->i_mode)) { file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); } else { - file->user_info.fdType = cpu_to_be32(sbi->type); - file->user_info.fdCreator = cpu_to_be32(sbi->creator); + file->user_info.fdType = cpu_to_be32(HFSPLUS_SB(inode->i_sb).type); + file->user_info.fdCreator = cpu_to_be32(HFSPLUS_SB(inode->i_sb).creator); } if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); @@ -140,8 +131,8 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); file->user_info.fdFlags = cpu_to_be16(0x100); - file->create_date = HFSPLUS_I(sbi->hidden_dir)->create_date; - file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode)->linkid); + file->create_date = HFSPLUS_I(HFSPLUS_SB(inode->i_sb).hidden_dir).create_date; + file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode).dev); } return sizeof(*file); } @@ -189,14 +180,15 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) { - struct super_block *sb = dir->i_sb; struct hfs_find_data fd; + struct super_block *sb; hfsplus_cat_entry entry; int entry_size; int err; dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + sb = dir->i_sb; + hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? @@ -242,7 +234,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct ino int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) { - struct super_block *sb = dir->i_sb; + struct super_block *sb; struct hfs_find_data fd; struct hfsplus_fork_raw fork; struct list_head *pos; @@ -250,7 +242,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) u16 type; dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + sb = dir->i_sb; + hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); if (!str) { int len; @@ -286,7 +279,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC); } - list_for_each(pos, &HFSPLUS_I(dir)->open_dir_list) { + list_for_each(pos, &HFSPLUS_I(dir).open_dir_list) { struct hfsplus_readdir_data *rd = list_entry(pos, struct hfsplus_readdir_data, list); if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0) @@ -319,7 +312,7 @@ int hfsplus_rename_cat(u32 cnid, struct inode *src_dir, struct qstr *src_name, struct inode *dst_dir, struct qstr *dst_name) { - struct super_block *sb = src_dir->i_sb; + struct super_block *sb; struct hfs_find_data src_fd, dst_fd; hfsplus_cat_entry entry; int entry_size, type; @@ -327,7 +320,8 @@ int hfsplus_rename_cat(u32 cnid, dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, dst_dir->i_ino, dst_name->name); - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); + sb = src_dir->i_sb; + hfs_find_init(HFSPLUS_SB(sb).cat_tree, &src_fd); dst_fd = src_fd; /* find the old dir entry and read the data */ diff --git a/trunk/fs/hfsplus/dir.c b/trunk/fs/hfsplus/dir.c index d236d85ec9d7..764fd1bdca88 100644 --- a/trunk/fs/hfsplus/dir.c +++ b/trunk/fs/hfsplus/dir.c @@ -39,7 +39,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, dentry->d_op = &hfsplus_dentry_operations; dentry->d_fsdata = NULL; - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); again: err = hfs_brec_read(&fd, &entry, sizeof(entry)); @@ -68,9 +68,9 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, cnid = be32_to_cpu(entry.file.id); if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && - (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->create_date || - entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode)->create_date) && - HFSPLUS_SB(sb)->hidden_dir) { + (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb).hidden_dir).create_date || + entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode).create_date) && + HFSPLUS_SB(sb).hidden_dir) { struct qstr str; char name[32]; @@ -86,8 +86,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, linkid = be32_to_cpu(entry.file.permissions.dev); str.len = sprintf(name, "iNode%d", linkid); str.name = name; - hfsplus_cat_build_key(sb, fd.search_key, - HFSPLUS_SB(sb)->hidden_dir->i_ino, &str); + hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb).hidden_dir->i_ino, &str); goto again; } } else if (!dentry->d_fsdata) @@ -102,7 +101,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) return ERR_CAST(inode); if (S_ISREG(inode->i_mode)) - HFSPLUS_I(inode)->linkid = linkid; + HFSPLUS_I(inode).dev = linkid; out: d_add(dentry, inode); return NULL; @@ -125,7 +124,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) if (filp->f_pos >= inode->i_size) return 0; - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); err = hfs_brec_find(&fd); if (err) @@ -181,9 +180,8 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) err = -EIO; goto out; } - if (HFSPLUS_SB(sb)->hidden_dir && - HFSPLUS_SB(sb)->hidden_dir->i_ino == - be32_to_cpu(entry.folder.id)) + if (HFSPLUS_SB(sb).hidden_dir && + HFSPLUS_SB(sb).hidden_dir->i_ino == be32_to_cpu(entry.folder.id)) goto next; if (filldir(dirent, strbuf, len, filp->f_pos, be32_to_cpu(entry.folder.id), DT_DIR)) @@ -219,7 +217,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) } filp->private_data = rd; rd->file = filp; - list_add(&rd->list, &HFSPLUS_I(inode)->open_dir_list); + list_add(&rd->list, &HFSPLUS_I(inode).open_dir_list); } memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); out: @@ -231,18 +229,38 @@ static int hfsplus_dir_release(struct inode *inode, struct file *file) { struct hfsplus_readdir_data *rd = file->private_data; if (rd) { - mutex_lock(&inode->i_mutex); list_del(&rd->list); - mutex_unlock(&inode->i_mutex); kfree(rd); } return 0; } +static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct inode *inode; + int res; + + inode = hfsplus_new_inode(dir->i_sb, mode); + if (!inode) + return -ENOSPC; + + res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); + if (res) { + inode->i_nlink = 0; + hfsplus_delete_inode(inode); + iput(inode); + return res; + } + hfsplus_instantiate(dentry, inode, inode->i_ino); + mark_inode_dirty(inode); + return 0; +} + static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, struct dentry *dst_dentry) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(dst_dir->i_sb); + struct super_block *sb = dst_dir->i_sb; struct inode *inode = src_dentry->d_inode; struct inode *src_dir = src_dentry->d_parent->d_inode; struct qstr str; @@ -252,10 +270,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, if (HFSPLUS_IS_RSRC(inode)) return -EPERM; - if (!S_ISREG(inode->i_mode)) - return -EPERM; - mutex_lock(&sbi->vh_mutex); if (inode->i_ino == (u32)(unsigned long)src_dentry->d_fsdata) { for (;;) { get_random_bytes(&id, sizeof(cnid)); @@ -264,41 +279,40 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, str.len = sprintf(name, "iNode%d", id); res = hfsplus_rename_cat(inode->i_ino, src_dir, &src_dentry->d_name, - sbi->hidden_dir, &str); + HFSPLUS_SB(sb).hidden_dir, &str); if (!res) break; if (res != -EEXIST) - goto out; + return res; } - HFSPLUS_I(inode)->linkid = id; - cnid = sbi->next_cnid++; + HFSPLUS_I(inode).dev = id; + cnid = HFSPLUS_SB(sb).next_cnid++; src_dentry->d_fsdata = (void *)(unsigned long)cnid; res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode); if (res) /* panic? */ - goto out; - sbi->file_count++; + return res; + HFSPLUS_SB(sb).file_count++; } - cnid = sbi->next_cnid++; + cnid = HFSPLUS_SB(sb).next_cnid++; res = hfsplus_create_cat(cnid, dst_dir, &dst_dentry->d_name, inode); if (res) - goto out; + return res; inc_nlink(inode); hfsplus_instantiate(dst_dentry, inode, cnid); atomic_inc(&inode->i_count); inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); - sbi->file_count++; - dst_dir->i_sb->s_dirt = 1; -out: - mutex_unlock(&sbi->vh_mutex); - return res; + HFSPLUS_SB(sb).file_count++; + sb->s_dirt = 1; + + return 0; } static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); + struct super_block *sb = dir->i_sb; struct inode *inode = dentry->d_inode; struct qstr str; char name[32]; @@ -308,22 +322,21 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) if (HFSPLUS_IS_RSRC(inode)) return -EPERM; - mutex_lock(&sbi->vh_mutex); cnid = (u32)(unsigned long)dentry->d_fsdata; if (inode->i_ino == cnid && - atomic_read(&HFSPLUS_I(inode)->opencnt)) { + atomic_read(&HFSPLUS_I(inode).opencnt)) { str.name = name; str.len = sprintf(name, "temp%lu", inode->i_ino); res = hfsplus_rename_cat(inode->i_ino, dir, &dentry->d_name, - sbi->hidden_dir, &str); + HFSPLUS_SB(sb).hidden_dir, &str); if (!res) inode->i_flags |= S_DEAD; - goto out; + return res; } res = hfsplus_delete_cat(cnid, dir, &dentry->d_name); if (res) - goto out; + return res; if (inode->i_nlink > 0) drop_nlink(inode); @@ -331,10 +344,10 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) clear_nlink(inode); if (!inode->i_nlink) { if (inode->i_ino != cnid) { - sbi->file_count--; - if (!atomic_read(&HFSPLUS_I(inode)->opencnt)) { + HFSPLUS_SB(sb).file_count--; + if (!atomic_read(&HFSPLUS_I(inode).opencnt)) { res = hfsplus_delete_cat(inode->i_ino, - sbi->hidden_dir, + HFSPLUS_SB(sb).hidden_dir, NULL); if (!res) hfsplus_delete_inode(inode); @@ -343,108 +356,107 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) } else hfsplus_delete_inode(inode); } else - sbi->file_count--; + HFSPLUS_SB(sb).file_count--; inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); -out: - mutex_unlock(&sbi->vh_mutex); + return res; } +static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct inode *inode; + int res; + + inode = hfsplus_new_inode(dir->i_sb, S_IFDIR | mode); + if (!inode) + return -ENOSPC; + + res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); + if (res) { + inode->i_nlink = 0; + hfsplus_delete_inode(inode); + iput(inode); + return res; + } + hfsplus_instantiate(dentry, inode, inode->i_ino); + mark_inode_dirty(inode); + return 0; +} + static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); - struct inode *inode = dentry->d_inode; + struct inode *inode; int res; + inode = dentry->d_inode; if (inode->i_size != 2) return -ENOTEMPTY; - - mutex_lock(&sbi->vh_mutex); res = hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); if (res) - goto out; + return res; clear_nlink(inode); inode->i_ctime = CURRENT_TIME_SEC; hfsplus_delete_inode(inode); mark_inode_dirty(inode); -out: - mutex_unlock(&sbi->vh_mutex); - return res; + return 0; } static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); + struct super_block *sb; struct inode *inode; - int res = -ENOSPC; + int res; - mutex_lock(&sbi->vh_mutex); - inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO); + sb = dir->i_sb; + inode = hfsplus_new_inode(sb, S_IFLNK | S_IRWXUGO); if (!inode) - goto out; + return -ENOSPC; res = page_symlink(inode, symname, strlen(symname) + 1); - if (res) - goto out_err; + if (res) { + inode->i_nlink = 0; + hfsplus_delete_inode(inode); + iput(inode); + return res; + } + mark_inode_dirty(inode); res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); - if (res) - goto out_err; - hfsplus_instantiate(dentry, inode, inode->i_ino); - mark_inode_dirty(inode); - goto out; + if (!res) { + hfsplus_instantiate(dentry, inode, inode->i_ino); + mark_inode_dirty(inode); + } -out_err: - inode->i_nlink = 0; - hfsplus_delete_inode(inode); - iput(inode); -out: - mutex_unlock(&sbi->vh_mutex); return res; } static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); + struct super_block *sb; struct inode *inode; - int res = -ENOSPC; + int res; - mutex_lock(&sbi->vh_mutex); - inode = hfsplus_new_inode(dir->i_sb, mode); + sb = dir->i_sb; + inode = hfsplus_new_inode(sb, mode); if (!inode) - goto out; - - if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) - init_special_inode(inode, mode, rdev); + return -ENOSPC; res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); if (res) { inode->i_nlink = 0; hfsplus_delete_inode(inode); iput(inode); - goto out; + return res; } - + init_special_inode(inode, mode, rdev); hfsplus_instantiate(dentry, inode, inode->i_ino); mark_inode_dirty(inode); -out: - mutex_unlock(&sbi->vh_mutex); - return res; -} -static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) -{ - return hfsplus_mknod(dir, dentry, mode, 0); -} - -static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode) -{ - return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0); + return 0; } static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -454,10 +466,7 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, /* Unlink destination if it already exists */ if (new_dentry->d_inode) { - if (S_ISDIR(new_dentry->d_inode->i_mode)) - res = hfsplus_rmdir(new_dir, new_dentry); - else - res = hfsplus_unlink(new_dir, new_dentry); + res = hfsplus_unlink(new_dir, new_dentry); if (res) return res; } diff --git a/trunk/fs/hfsplus/extents.c b/trunk/fs/hfsplus/extents.c index 0c9cb1820a52..0022eec63cda 100644 --- a/trunk/fs/hfsplus/extents.c +++ b/trunk/fs/hfsplus/extents.c @@ -85,49 +85,35 @@ static u32 hfsplus_ext_lastblock(struct hfsplus_extent *ext) static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) { - struct hfsplus_inode_info *hip = HFSPLUS_I(inode); int res; - WARN_ON(!mutex_is_locked(&hip->extents_lock)); - - hfsplus_ext_build_key(fd->search_key, inode->i_ino, hip->cached_start, - HFSPLUS_IS_RSRC(inode) ? - HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); - + hfsplus_ext_build_key(fd->search_key, inode->i_ino, HFSPLUS_I(inode).cached_start, + HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); res = hfs_brec_find(fd); - if (hip->flags & HFSPLUS_FLG_EXT_NEW) { + if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_NEW) { if (res != -ENOENT) return; - hfs_brec_insert(fd, hip->cached_extents, - sizeof(hfsplus_extent_rec)); - hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); + hfs_brec_insert(fd, HFSPLUS_I(inode).cached_extents, sizeof(hfsplus_extent_rec)); + HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); } else { if (res) return; - hfs_bnode_write(fd->bnode, hip->cached_extents, - fd->entryoffset, fd->entrylength); - hip->flags &= ~HFSPLUS_FLG_EXT_DIRTY; + hfs_bnode_write(fd->bnode, HFSPLUS_I(inode).cached_extents, fd->entryoffset, fd->entrylength); + HFSPLUS_I(inode).flags &= ~HFSPLUS_FLG_EXT_DIRTY; } } -static void hfsplus_ext_write_extent_locked(struct inode *inode) +void hfsplus_ext_write_extent(struct inode *inode) { - if (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_EXT_DIRTY) { + if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_DIRTY) { struct hfs_find_data fd; - hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); + hfs_find_init(HFSPLUS_SB(inode->i_sb).ext_tree, &fd); __hfsplus_ext_write_extent(inode, &fd); hfs_find_exit(&fd); } } -void hfsplus_ext_write_extent(struct inode *inode) -{ - mutex_lock(&HFSPLUS_I(inode)->extents_lock); - hfsplus_ext_write_extent_locked(inode); - mutex_unlock(&HFSPLUS_I(inode)->extents_lock); -} - static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, struct hfsplus_extent *extent, u32 cnid, u32 block, u8 type) @@ -150,39 +136,33 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block) { - struct hfsplus_inode_info *hip = HFSPLUS_I(inode); int res; - WARN_ON(!mutex_is_locked(&hip->extents_lock)); - - if (hip->flags & HFSPLUS_FLG_EXT_DIRTY) + if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_DIRTY) __hfsplus_ext_write_extent(inode, fd); - res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino, - block, HFSPLUS_IS_RSRC(inode) ? - HFSPLUS_TYPE_RSRC : - HFSPLUS_TYPE_DATA); + res = __hfsplus_ext_read_extent(fd, HFSPLUS_I(inode).cached_extents, inode->i_ino, + block, HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); if (!res) { - hip->cached_start = be32_to_cpu(fd->key->ext.start_block); - hip->cached_blocks = hfsplus_ext_block_count(hip->cached_extents); + HFSPLUS_I(inode).cached_start = be32_to_cpu(fd->key->ext.start_block); + HFSPLUS_I(inode).cached_blocks = hfsplus_ext_block_count(HFSPLUS_I(inode).cached_extents); } else { - hip->cached_start = hip->cached_blocks = 0; - hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); + HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).cached_blocks = 0; + HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); } return res; } static int hfsplus_ext_read_extent(struct inode *inode, u32 block) { - struct hfsplus_inode_info *hip = HFSPLUS_I(inode); struct hfs_find_data fd; int res; - if (block >= hip->cached_start && - block < hip->cached_start + hip->cached_blocks) + if (block >= HFSPLUS_I(inode).cached_start && + block < HFSPLUS_I(inode).cached_start + HFSPLUS_I(inode).cached_blocks) return 0; - hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); + hfs_find_init(HFSPLUS_SB(inode->i_sb).ext_tree, &fd); res = __hfsplus_ext_cache_extent(&fd, inode, block); hfs_find_exit(&fd); return res; @@ -192,21 +172,21 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block) int hfsplus_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - struct super_block *sb = inode->i_sb; - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); - struct hfsplus_inode_info *hip = HFSPLUS_I(inode); + struct super_block *sb; int res = -EIO; u32 ablock, dblock, mask; int shift; + sb = inode->i_sb; + /* Convert inode block to disk allocation block */ - shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; - ablock = iblock >> sbi->fs_shift; + shift = HFSPLUS_SB(sb).alloc_blksz_shift - sb->s_blocksize_bits; + ablock = iblock >> HFSPLUS_SB(sb).fs_shift; - if (iblock >= hip->fs_blocks) { - if (iblock > hip->fs_blocks || !create) + if (iblock >= HFSPLUS_I(inode).fs_blocks) { + if (iblock > HFSPLUS_I(inode).fs_blocks || !create) return -EIO; - if (ablock >= hip->alloc_blocks) { + if (ablock >= HFSPLUS_I(inode).alloc_blocks) { res = hfsplus_file_extend(inode); if (res) return res; @@ -214,33 +194,33 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, } else create = 0; - if (ablock < hip->first_blocks) { - dblock = hfsplus_ext_find_block(hip->first_extents, ablock); + if (ablock < HFSPLUS_I(inode).first_blocks) { + dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).first_extents, ablock); goto done; } if (inode->i_ino == HFSPLUS_EXT_CNID) return -EIO; - mutex_lock(&hip->extents_lock); + mutex_lock(&HFSPLUS_I(inode).extents_lock); res = hfsplus_ext_read_extent(inode, ablock); if (!res) { - dblock = hfsplus_ext_find_block(hip->cached_extents, - ablock - hip->cached_start); + dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock - + HFSPLUS_I(inode).cached_start); } else { - mutex_unlock(&hip->extents_lock); + mutex_unlock(&HFSPLUS_I(inode).extents_lock); return -EIO; } - mutex_unlock(&hip->extents_lock); + mutex_unlock(&HFSPLUS_I(inode).extents_lock); done: dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); - mask = (1 << sbi->fs_shift) - 1; - map_bh(bh_result, sb, (dblock << sbi->fs_shift) + sbi->blockoffset + (iblock & mask)); + mask = (1 << HFSPLUS_SB(sb).fs_shift) - 1; + map_bh(bh_result, sb, (dblock << HFSPLUS_SB(sb).fs_shift) + HFSPLUS_SB(sb).blockoffset + (iblock & mask)); if (create) { set_buffer_new(bh_result); - hip->phys_size += sb->s_blocksize; - hip->fs_blocks++; + HFSPLUS_I(inode).phys_size += sb->s_blocksize; + HFSPLUS_I(inode).fs_blocks++; inode_add_bytes(inode, sb->s_blocksize); mark_inode_dirty(inode); } @@ -347,7 +327,7 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw if (total_blocks == blocks) return 0; - hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); + hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); do { res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, total_blocks, type); @@ -368,33 +348,29 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw int hfsplus_file_extend(struct inode *inode) { struct super_block *sb = inode->i_sb; - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); - struct hfsplus_inode_info *hip = HFSPLUS_I(inode); u32 start, len, goal; int res; - if (sbi->alloc_file->i_size * 8 < - sbi->total_blocks - sbi->free_blocks + 8) { + if (HFSPLUS_SB(sb).alloc_file->i_size * 8 < HFSPLUS_SB(sb).total_blocks - HFSPLUS_SB(sb).free_blocks + 8) { // extend alloc file - printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", - sbi->alloc_file->i_size * 8, - sbi->total_blocks, sbi->free_blocks); + printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", HFSPLUS_SB(sb).alloc_file->i_size * 8, + HFSPLUS_SB(sb).total_blocks, HFSPLUS_SB(sb).free_blocks); return -ENOSPC; } - mutex_lock(&hip->extents_lock); - if (hip->alloc_blocks == hip->first_blocks) - goal = hfsplus_ext_lastblock(hip->first_extents); + mutex_lock(&HFSPLUS_I(inode).extents_lock); + if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks) + goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents); else { - res = hfsplus_ext_read_extent(inode, hip->alloc_blocks); + res = hfsplus_ext_read_extent(inode, HFSPLUS_I(inode).alloc_blocks); if (res) goto out; - goal = hfsplus_ext_lastblock(hip->cached_extents); + goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).cached_extents); } - len = hip->clump_blocks; - start = hfsplus_block_allocate(sb, sbi->total_blocks, goal, &len); - if (start >= sbi->total_blocks) { + len = HFSPLUS_I(inode).clump_blocks; + start = hfsplus_block_allocate(sb, HFSPLUS_SB(sb).total_blocks, goal, &len); + if (start >= HFSPLUS_SB(sb).total_blocks) { start = hfsplus_block_allocate(sb, goal, 0, &len); if (start >= goal) { res = -ENOSPC; @@ -403,56 +379,56 @@ int hfsplus_file_extend(struct inode *inode) } dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); - - if (hip->alloc_blocks <= hip->first_blocks) { - if (!hip->first_blocks) { + if (HFSPLUS_I(inode).alloc_blocks <= HFSPLUS_I(inode).first_blocks) { + if (!HFSPLUS_I(inode).first_blocks) { dprint(DBG_EXTENT, "first extents\n"); /* no extents yet */ - hip->first_extents[0].start_block = cpu_to_be32(start); - hip->first_extents[0].block_count = cpu_to_be32(len); + HFSPLUS_I(inode).first_extents[0].start_block = cpu_to_be32(start); + HFSPLUS_I(inode).first_extents[0].block_count = cpu_to_be32(len); res = 0; } else { /* try to append to extents in inode */ - res = hfsplus_add_extent(hip->first_extents, - hip->alloc_blocks, + res = hfsplus_add_extent(HFSPLUS_I(inode).first_extents, + HFSPLUS_I(inode).alloc_blocks, start, len); if (res == -ENOSPC) goto insert_extent; } if (!res) { - hfsplus_dump_extent(hip->first_extents); - hip->first_blocks += len; + hfsplus_dump_extent(HFSPLUS_I(inode).first_extents); + HFSPLUS_I(inode).first_blocks += len; } } else { - res = hfsplus_add_extent(hip->cached_extents, - hip->alloc_blocks - hip->cached_start, + res = hfsplus_add_extent(HFSPLUS_I(inode).cached_extents, + HFSPLUS_I(inode).alloc_blocks - + HFSPLUS_I(inode).cached_start, start, len); if (!res) { - hfsplus_dump_extent(hip->cached_extents); - hip->flags |= HFSPLUS_FLG_EXT_DIRTY; - hip->cached_blocks += len; + hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); + HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY; + HFSPLUS_I(inode).cached_blocks += len; } else if (res == -ENOSPC) goto insert_extent; } out: - mutex_unlock(&hip->extents_lock); + mutex_unlock(&HFSPLUS_I(inode).extents_lock); if (!res) { - hip->alloc_blocks += len; + HFSPLUS_I(inode).alloc_blocks += len; mark_inode_dirty(inode); } return res; insert_extent: dprint(DBG_EXTENT, "insert new extent\n"); - hfsplus_ext_write_extent_locked(inode); + hfsplus_ext_write_extent(inode); - memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); - hip->cached_extents[0].start_block = cpu_to_be32(start); - hip->cached_extents[0].block_count = cpu_to_be32(len); - hfsplus_dump_extent(hip->cached_extents); - hip->flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW; - hip->cached_start = hip->alloc_blocks; - hip->cached_blocks = len; + memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); + HFSPLUS_I(inode).cached_extents[0].start_block = cpu_to_be32(start); + HFSPLUS_I(inode).cached_extents[0].block_count = cpu_to_be32(len); + hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); + HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW; + HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).alloc_blocks; + HFSPLUS_I(inode).cached_blocks = len; res = 0; goto out; @@ -461,15 +437,13 @@ int hfsplus_file_extend(struct inode *inode) void hfsplus_file_truncate(struct inode *inode) { struct super_block *sb = inode->i_sb; - struct hfsplus_inode_info *hip = HFSPLUS_I(inode); struct hfs_find_data fd; u32 alloc_cnt, blk_cnt, start; int res; - dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", - inode->i_ino, (long long)hip->phys_size, inode->i_size); - - if (inode->i_size > hip->phys_size) { + dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", inode->i_ino, + (long long)HFSPLUS_I(inode).phys_size, inode->i_size); + if (inode->i_size > HFSPLUS_I(inode).phys_size) { struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; @@ -486,48 +460,47 @@ void hfsplus_file_truncate(struct inode *inode) return; mark_inode_dirty(inode); return; - } else if (inode->i_size == hip->phys_size) + } else if (inode->i_size == HFSPLUS_I(inode).phys_size) return; - blk_cnt = (inode->i_size + HFSPLUS_SB(sb)->alloc_blksz - 1) >> - HFSPLUS_SB(sb)->alloc_blksz_shift; - alloc_cnt = hip->alloc_blocks; + blk_cnt = (inode->i_size + HFSPLUS_SB(sb).alloc_blksz - 1) >> HFSPLUS_SB(sb).alloc_blksz_shift; + alloc_cnt = HFSPLUS_I(inode).alloc_blocks; if (blk_cnt == alloc_cnt) goto out; - mutex_lock(&hip->extents_lock); - hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); + mutex_lock(&HFSPLUS_I(inode).extents_lock); + hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); while (1) { - if (alloc_cnt == hip->first_blocks) { - hfsplus_free_extents(sb, hip->first_extents, + if (alloc_cnt == HFSPLUS_I(inode).first_blocks) { + hfsplus_free_extents(sb, HFSPLUS_I(inode).first_extents, alloc_cnt, alloc_cnt - blk_cnt); - hfsplus_dump_extent(hip->first_extents); - hip->first_blocks = blk_cnt; + hfsplus_dump_extent(HFSPLUS_I(inode).first_extents); + HFSPLUS_I(inode).first_blocks = blk_cnt; break; } res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); if (res) break; - start = hip->cached_start; - hfsplus_free_extents(sb, hip->cached_extents, + start = HFSPLUS_I(inode).cached_start; + hfsplus_free_extents(sb, HFSPLUS_I(inode).cached_extents, alloc_cnt - start, alloc_cnt - blk_cnt); - hfsplus_dump_extent(hip->cached_extents); + hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); if (blk_cnt > start) { - hip->flags |= HFSPLUS_FLG_EXT_DIRTY; + HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY; break; } alloc_cnt = start; - hip->cached_start = hip->cached_blocks = 0; - hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); + HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).cached_blocks = 0; + HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); hfs_brec_remove(&fd); } hfs_find_exit(&fd); - mutex_unlock(&hip->extents_lock); + mutex_unlock(&HFSPLUS_I(inode).extents_lock); - hip->alloc_blocks = blk_cnt; + HFSPLUS_I(inode).alloc_blocks = blk_cnt; out: - hip->phys_size = inode->i_size; - hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; - inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); + HFSPLUS_I(inode).phys_size = inode->i_size; + HFSPLUS_I(inode).fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; + inode_set_bytes(inode, HFSPLUS_I(inode).fs_blocks << sb->s_blocksize_bits); mark_inode_dirty(inode); } diff --git a/trunk/fs/hfsplus/hfsplus_fs.h b/trunk/fs/hfsplus/hfsplus_fs.h index cb3653efb57a..dc856be3c2b0 100644 --- a/trunk/fs/hfsplus/hfsplus_fs.h +++ b/trunk/fs/hfsplus/hfsplus_fs.h @@ -62,7 +62,7 @@ struct hfs_btree { unsigned int depth; //unsigned int map1_size, map_size; - struct mutex tree_lock; + struct semaphore tree_lock; unsigned int pages_per_bnode; spinlock_t hash_lock; @@ -121,21 +121,16 @@ struct hfsplus_sb_info { u32 sect_count; int fs_shift; - /* immutable data from the volume header */ + /* Stuff in host order from Vol Header */ u32 alloc_blksz; int alloc_blksz_shift; u32 total_blocks; - u32 data_clump_blocks, rsrc_clump_blocks; - - /* mutable data from the volume header, protected by alloc_mutex */ u32 free_blocks; - struct mutex alloc_mutex; - - /* mutable data from the volume header, protected by vh_mutex */ + u32 next_alloc; u32 next_cnid; u32 file_count; u32 folder_count; - struct mutex vh_mutex; + u32 data_clump_blocks, rsrc_clump_blocks; /* Config options */ u32 creator; @@ -148,50 +143,40 @@ struct hfsplus_sb_info { int part, session; unsigned long flags; + + struct hlist_head rsrc_inodes; }; -#define HFSPLUS_SB_WRITEBACKUP 0 -#define HFSPLUS_SB_NODECOMPOSE 1 -#define HFSPLUS_SB_FORCE 2 -#define HFSPLUS_SB_HFSX 3 -#define HFSPLUS_SB_CASEFOLD 4 +#define HFSPLUS_SB_WRITEBACKUP 0x0001 +#define HFSPLUS_SB_NODECOMPOSE 0x0002 +#define HFSPLUS_SB_FORCE 0x0004 +#define HFSPLUS_SB_HFSX 0x0008 +#define HFSPLUS_SB_CASEFOLD 0x0010 struct hfsplus_inode_info { - atomic_t opencnt; - - /* - * Extent allocation information, protected by extents_lock. - */ - u32 first_blocks; - u32 clump_blocks; - u32 alloc_blocks; - u32 cached_start; - u32 cached_blocks; + struct mutex extents_lock; + u32 clump_blocks, alloc_blocks; + sector_t fs_blocks; + /* Allocation extents from catalog record or volume header */ hfsplus_extent_rec first_extents; + u32 first_blocks; hfsplus_extent_rec cached_extents; - unsigned long flags; - struct mutex extents_lock; + u32 cached_start, cached_blocks; + atomic_t opencnt; - /* - * Immutable data. - */ struct inode *rsrc_inode; - __be32 create_date; + unsigned long flags; - /* - * Protected by sbi->vh_mutex. - */ - u32 linkid; + __be32 create_date; + /* Device number in hfsplus_permissions in catalog */ + u32 dev; + /* BSD system and user file flags */ + u8 rootflags; + u8 userflags; - /* - * Protected by i_mutex. - */ - sector_t fs_blocks; - u8 userflags; /* BSD user file flags */ struct list_head open_dir_list; loff_t phys_size; - struct inode vfs_inode; }; @@ -199,8 +184,8 @@ struct hfsplus_inode_info { #define HFSPLUS_FLG_EXT_DIRTY 0x0002 #define HFSPLUS_FLG_EXT_NEW 0x0004 -#define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC)) -#define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC) +#define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode).flags & HFSPLUS_FLG_RSRC)) +#define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode).flags & HFSPLUS_FLG_RSRC) struct hfs_find_data { /* filled by caller */ @@ -326,7 +311,6 @@ int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *); int hfsplus_delete_cat(u32, struct inode *, struct qstr *); int hfsplus_rename_cat(u32, struct inode *, struct qstr *, struct inode *, struct qstr *); -void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms); /* dir.c */ extern const struct inode_operations hfsplus_dir_inode_operations; @@ -388,15 +372,26 @@ int hfsplus_read_wrapper(struct super_block *); int hfs_part_find(struct super_block *, sector_t *, sector_t *); /* access macros */ +/* static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) { return sb->s_fs_info; } - static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) { return list_entry(inode, struct hfsplus_inode_info, vfs_inode); } +*/ +#define HFSPLUS_SB(super) (*(struct hfsplus_sb_info *)(super)->s_fs_info) +#define HFSPLUS_I(inode) (*list_entry(inode, struct hfsplus_inode_info, vfs_inode)) + +#if 1 +#define hfsplus_kmap(p) ({ struct page *__p = (p); kmap(__p); }) +#define hfsplus_kunmap(p) ({ struct page *__p = (p); kunmap(__p); __p; }) +#else +#define hfsplus_kmap(p) kmap(p) +#define hfsplus_kunmap(p) kunmap(p) +#endif #define sb_bread512(sb, sec, data) ({ \ struct buffer_head *__bh; \ @@ -424,4 +419,6 @@ static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) #define hfsp_ut2mt(t) __hfsp_ut2mt((t).tv_sec) #define hfsp_now2mt() __hfsp_ut2mt(get_seconds()) +#define kdev_t_to_nr(x) (x) + #endif diff --git a/trunk/fs/hfsplus/hfsplus_raw.h b/trunk/fs/hfsplus/hfsplus_raw.h index 6892899fd6fb..fe99fe8db61a 100644 --- a/trunk/fs/hfsplus/hfsplus_raw.h +++ b/trunk/fs/hfsplus/hfsplus_raw.h @@ -200,7 +200,6 @@ struct hfsplus_cat_key { struct hfsplus_unistr name; } __packed; -#define HFSPLUS_CAT_KEYLEN (sizeof(struct hfsplus_cat_key)) /* Structs from hfs.h */ struct hfsp_point { @@ -324,7 +323,7 @@ struct hfsplus_ext_key { __be32 start_block; } __packed; -#define HFSPLUS_EXT_KEYLEN sizeof(struct hfsplus_ext_key) +#define HFSPLUS_EXT_KEYLEN 12 /* HFS+ generic BTree key */ typedef union { diff --git a/trunk/fs/hfsplus/inode.c b/trunk/fs/hfsplus/inode.c index 78449280dae0..c5a979d62c65 100644 --- a/trunk/fs/hfsplus/inode.c +++ b/trunk/fs/hfsplus/inode.c @@ -36,7 +36,7 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping, *pagep = NULL; ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, hfsplus_get_block, - &HFSPLUS_I(mapping->host)->phys_size); + &HFSPLUS_I(mapping->host).phys_size); if (unlikely(ret)) { loff_t isize = mapping->host->i_size; if (pos + len > isize) @@ -62,13 +62,13 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) switch (inode->i_ino) { case HFSPLUS_EXT_CNID: - tree = HFSPLUS_SB(sb)->ext_tree; + tree = HFSPLUS_SB(sb).ext_tree; break; case HFSPLUS_CAT_CNID: - tree = HFSPLUS_SB(sb)->cat_tree; + tree = HFSPLUS_SB(sb).cat_tree; break; case HFSPLUS_ATTR_CNID: - tree = HFSPLUS_SB(sb)->attr_tree; + tree = HFSPLUS_SB(sb).attr_tree; break; default: BUG(); @@ -172,13 +172,12 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent struct hfs_find_data fd; struct super_block *sb = dir->i_sb; struct inode *inode = NULL; - struct hfsplus_inode_info *hip; int err; if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) goto out; - inode = HFSPLUS_I(dir)->rsrc_inode; + inode = HFSPLUS_I(dir).rsrc_inode; if (inode) goto out; @@ -186,13 +185,12 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent if (!inode) return ERR_PTR(-ENOMEM); - hip = HFSPLUS_I(inode); inode->i_ino = dir->i_ino; - INIT_LIST_HEAD(&hip->open_dir_list); - mutex_init(&hip->extents_lock); - hip->flags = HFSPLUS_FLG_RSRC; + INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); + mutex_init(&HFSPLUS_I(inode).extents_lock); + HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC; - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); err = hfsplus_find_cat(sb, dir->i_ino, &fd); if (!err) err = hfsplus_cat_read_inode(inode, &fd); @@ -201,18 +199,10 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent iput(inode); return ERR_PTR(err); } - hip->rsrc_inode = dir; - HFSPLUS_I(dir)->rsrc_inode = inode; + HFSPLUS_I(inode).rsrc_inode = dir; + HFSPLUS_I(dir).rsrc_inode = inode; igrab(dir); - - /* - * __mark_inode_dirty expects inodes to be hashed. Since we don't - * want resource fork inodes in the regular inode space, we make them - * appear hashed, but do not put on any lists. hlist_del() - * will work fine and require no locking. - */ - inode->i_hash.pprev = &inode->i_hash.next; - + hlist_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes); mark_inode_dirty(inode); out: d_add(dentry, inode); @@ -221,27 +211,30 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); + struct super_block *sb = inode->i_sb; u16 mode; mode = be16_to_cpu(perms->mode); inode->i_uid = be32_to_cpu(perms->owner); if (!inode->i_uid && !mode) - inode->i_uid = sbi->uid; + inode->i_uid = HFSPLUS_SB(sb).uid; inode->i_gid = be32_to_cpu(perms->group); if (!inode->i_gid && !mode) - inode->i_gid = sbi->gid; + inode->i_gid = HFSPLUS_SB(sb).gid; if (dir) { - mode = mode ? (mode & S_IALLUGO) : (S_IRWXUGO & ~(sbi->umask)); + mode = mode ? (mode & S_IALLUGO) : + (S_IRWXUGO & ~(HFSPLUS_SB(sb).umask)); mode |= S_IFDIR; } else if (!mode) - mode = S_IFREG | ((S_IRUGO|S_IWUGO) & ~(sbi->umask)); + mode = S_IFREG | ((S_IRUGO|S_IWUGO) & + ~(HFSPLUS_SB(sb).umask)); inode->i_mode = mode; - HFSPLUS_I(inode)->userflags = perms->userflags; + HFSPLUS_I(inode).rootflags = perms->rootflags; + HFSPLUS_I(inode).userflags = perms->userflags; if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; else @@ -252,13 +245,30 @@ static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, i inode->i_flags &= ~S_APPEND; } +static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) +{ + if (inode->i_flags & S_IMMUTABLE) + perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; + else + perms->rootflags &= ~HFSPLUS_FLG_IMMUTABLE; + if (inode->i_flags & S_APPEND) + perms->rootflags |= HFSPLUS_FLG_APPEND; + else + perms->rootflags &= ~HFSPLUS_FLG_APPEND; + perms->userflags = HFSPLUS_I(inode).userflags; + perms->mode = cpu_to_be16(inode->i_mode); + perms->owner = cpu_to_be32(inode->i_uid); + perms->group = cpu_to_be32(inode->i_gid); + perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); +} + static int hfsplus_file_open(struct inode *inode, struct file *file) { if (HFSPLUS_IS_RSRC(inode)) - inode = HFSPLUS_I(inode)->rsrc_inode; + inode = HFSPLUS_I(inode).rsrc_inode; if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) return -EOVERFLOW; - atomic_inc(&HFSPLUS_I(inode)->opencnt); + atomic_inc(&HFSPLUS_I(inode).opencnt); return 0; } @@ -267,13 +277,12 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) struct super_block *sb = inode->i_sb; if (HFSPLUS_IS_RSRC(inode)) - inode = HFSPLUS_I(inode)->rsrc_inode; - if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) { + inode = HFSPLUS_I(inode).rsrc_inode; + if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) { mutex_lock(&inode->i_mutex); hfsplus_file_truncate(inode); if (inode->i_flags & S_DEAD) { - hfsplus_delete_cat(inode->i_ino, - HFSPLUS_SB(sb)->hidden_dir, NULL); + hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); hfsplus_delete_inode(inode); } mutex_unlock(&inode->i_mutex); @@ -352,52 +361,47 @@ static const struct file_operations hfsplus_file_operations = { struct inode *hfsplus_new_inode(struct super_block *sb, int mode) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct inode *inode = new_inode(sb); - struct hfsplus_inode_info *hip; - if (!inode) return NULL; - inode->i_ino = sbi->next_cnid++; + inode->i_ino = HFSPLUS_SB(sb).next_cnid++; inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_nlink = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - - hip = HFSPLUS_I(inode); - INIT_LIST_HEAD(&hip->open_dir_list); - mutex_init(&hip->extents_lock); - atomic_set(&hip->opencnt, 0); - hip->flags = 0; - memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); - memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); - hip->alloc_blocks = 0; - hip->first_blocks = 0; - hip->cached_start = 0; - hip->cached_blocks = 0; - hip->phys_size = 0; - hip->fs_blocks = 0; - hip->rsrc_inode = NULL; + INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); + mutex_init(&HFSPLUS_I(inode).extents_lock); + atomic_set(&HFSPLUS_I(inode).opencnt, 0); + HFSPLUS_I(inode).flags = 0; + memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec)); + memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); + HFSPLUS_I(inode).alloc_blocks = 0; + HFSPLUS_I(inode).first_blocks = 0; + HFSPLUS_I(inode).cached_start = 0; + HFSPLUS_I(inode).cached_blocks = 0; + HFSPLUS_I(inode).phys_size = 0; + HFSPLUS_I(inode).fs_blocks = 0; + HFSPLUS_I(inode).rsrc_inode = NULL; if (S_ISDIR(inode->i_mode)) { inode->i_size = 2; - sbi->folder_count++; + HFSPLUS_SB(sb).folder_count++; inode->i_op = &hfsplus_dir_inode_operations; inode->i_fop = &hfsplus_dir_operations; } else if (S_ISREG(inode->i_mode)) { - sbi->file_count++; + HFSPLUS_SB(sb).file_count++; inode->i_op = &hfsplus_file_inode_operations; inode->i_fop = &hfsplus_file_operations; inode->i_mapping->a_ops = &hfsplus_aops; - hip->clump_blocks = sbi->data_clump_blocks; + HFSPLUS_I(inode).clump_blocks = HFSPLUS_SB(sb).data_clump_blocks; } else if (S_ISLNK(inode->i_mode)) { - sbi->file_count++; + HFSPLUS_SB(sb).file_count++; inode->i_op = &page_symlink_inode_operations; inode->i_mapping->a_ops = &hfsplus_aops; - hip->clump_blocks = 1; + HFSPLUS_I(inode).clump_blocks = 1; } else - sbi->file_count++; + HFSPLUS_SB(sb).file_count++; insert_inode_hash(inode); mark_inode_dirty(inode); sb->s_dirt = 1; @@ -410,11 +414,11 @@ void hfsplus_delete_inode(struct inode *inode) struct super_block *sb = inode->i_sb; if (S_ISDIR(inode->i_mode)) { - HFSPLUS_SB(sb)->folder_count--; + HFSPLUS_SB(sb).folder_count--; sb->s_dirt = 1; return; } - HFSPLUS_SB(sb)->file_count--; + HFSPLUS_SB(sb).file_count--; if (S_ISREG(inode->i_mode)) { if (!inode->i_nlink) { inode->i_size = 0; @@ -430,39 +434,34 @@ void hfsplus_delete_inode(struct inode *inode) void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) { struct super_block *sb = inode->i_sb; - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); - struct hfsplus_inode_info *hip = HFSPLUS_I(inode); u32 count; int i; - memcpy(&hip->first_extents, &fork->extents, sizeof(hfsplus_extent_rec)); + memcpy(&HFSPLUS_I(inode).first_extents, &fork->extents, + sizeof(hfsplus_extent_rec)); for (count = 0, i = 0; i < 8; i++) count += be32_to_cpu(fork->extents[i].block_count); - hip->first_blocks = count; - memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); - hip->cached_start = 0; - hip->cached_blocks = 0; - - hip->alloc_blocks = be32_to_cpu(fork->total_blocks); - hip->phys_size = inode->i_size = be64_to_cpu(fork->total_size); - hip->fs_blocks = - (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; - inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); - hip->clump_blocks = - be32_to_cpu(fork->clump_size) >> sbi->alloc_blksz_shift; - if (!hip->clump_blocks) { - hip->clump_blocks = HFSPLUS_IS_RSRC(inode) ? - sbi->rsrc_clump_blocks : - sbi->data_clump_blocks; - } + HFSPLUS_I(inode).first_blocks = count; + memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); + HFSPLUS_I(inode).cached_start = 0; + HFSPLUS_I(inode).cached_blocks = 0; + + HFSPLUS_I(inode).alloc_blocks = be32_to_cpu(fork->total_blocks); + inode->i_size = HFSPLUS_I(inode).phys_size = be64_to_cpu(fork->total_size); + HFSPLUS_I(inode).fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; + inode_set_bytes(inode, HFSPLUS_I(inode).fs_blocks << sb->s_blocksize_bits); + HFSPLUS_I(inode).clump_blocks = be32_to_cpu(fork->clump_size) >> HFSPLUS_SB(sb).alloc_blksz_shift; + if (!HFSPLUS_I(inode).clump_blocks) + HFSPLUS_I(inode).clump_blocks = HFSPLUS_IS_RSRC(inode) ? HFSPLUS_SB(sb).rsrc_clump_blocks : + HFSPLUS_SB(sb).data_clump_blocks; } void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork) { - memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents, + memcpy(&fork->extents, &HFSPLUS_I(inode).first_extents, sizeof(hfsplus_extent_rec)); fork->total_size = cpu_to_be64(inode->i_size); - fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode)->alloc_blocks); + fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode).alloc_blocks); } int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) @@ -473,7 +472,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); - HFSPLUS_I(inode)->linkid = 0; + HFSPLUS_I(inode).dev = 0; if (type == HFSPLUS_FOLDER) { struct hfsplus_cat_folder *folder = &entry.folder; @@ -487,8 +486,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) inode->i_atime = hfsp_mt2ut(folder->access_date); inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); - HFSPLUS_I(inode)->create_date = folder->create_date; - HFSPLUS_I(inode)->fs_blocks = 0; + HFSPLUS_I(inode).create_date = folder->create_date; + HFSPLUS_I(inode).fs_blocks = 0; inode->i_op = &hfsplus_dir_inode_operations; inode->i_fop = &hfsplus_dir_operations; } else if (type == HFSPLUS_FILE) { @@ -519,7 +518,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) inode->i_atime = hfsp_mt2ut(file->access_date); inode->i_mtime = hfsp_mt2ut(file->content_mod_date); inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); - HFSPLUS_I(inode)->create_date = file->create_date; + HFSPLUS_I(inode).create_date = file->create_date; } else { printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); res = -EIO; @@ -534,12 +533,12 @@ int hfsplus_cat_write_inode(struct inode *inode) hfsplus_cat_entry entry; if (HFSPLUS_IS_RSRC(inode)) - main_inode = HFSPLUS_I(inode)->rsrc_inode; + main_inode = HFSPLUS_I(inode).rsrc_inode; if (!main_inode->i_nlink) return 0; - if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb)->cat_tree, &fd)) + if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb).cat_tree, &fd)) /* panic? */ return -EIO; @@ -555,7 +554,7 @@ int hfsplus_cat_write_inode(struct inode *inode) hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_folder)); /* simple node checks? */ - hfsplus_cat_set_perms(inode, &folder->permissions); + hfsplus_set_perms(inode, &folder->permissions); folder->access_date = hfsp_ut2mt(inode->i_atime); folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); @@ -577,7 +576,11 @@ int hfsplus_cat_write_inode(struct inode *inode) hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_file)); hfsplus_inode_write_fork(inode, &file->data_fork); - hfsplus_cat_set_perms(inode, &file->permissions); + if (S_ISREG(inode->i_mode)) + HFSPLUS_I(inode).dev = inode->i_nlink; + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + HFSPLUS_I(inode).dev = kdev_t_to_nr(inode->i_rdev); + hfsplus_set_perms(inode, &file->permissions); if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); else diff --git a/trunk/fs/hfsplus/ioctl.c b/trunk/fs/hfsplus/ioctl.c index 5b4667e08ef7..ac405f099026 100644 --- a/trunk/fs/hfsplus/ioctl.c +++ b/trunk/fs/hfsplus/ioctl.c @@ -17,98 +17,83 @@ #include #include #include +#include #include #include "hfsplus_fs.h" -static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) +long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - struct hfsplus_inode_info *hip = HFSPLUS_I(inode); - unsigned int flags = 0; - - if (inode->i_flags & S_IMMUTABLE) - flags |= FS_IMMUTABLE_FL; - if (inode->i_flags |= S_APPEND) - flags |= FS_APPEND_FL; - if (hip->userflags & HFSPLUS_FLG_NODUMP) - flags |= FS_NODUMP_FL; - - return put_user(flags, user_flags); -} - -static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) -{ - struct inode *inode = file->f_path.dentry->d_inode; - struct hfsplus_inode_info *hip = HFSPLUS_I(inode); + struct inode *inode = filp->f_path.dentry->d_inode; unsigned int flags; - int err = 0; - - err = mnt_want_write(file->f_path.mnt); - if (err) - goto out; - - if (!is_owner_or_cap(inode)) { - err = -EACCES; - goto out_drop_write; - } - - if (get_user(flags, user_flags)) { - err = -EFAULT; - goto out_drop_write; - } - mutex_lock(&inode->i_mutex); + lock_kernel(); + switch (cmd) { + case HFSPLUS_IOC_EXT2_GETFLAGS: + flags = 0; + if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE) + flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */ + if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND) + flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */ + if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP) + flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ + return put_user(flags, (int __user *)arg); + case HFSPLUS_IOC_EXT2_SETFLAGS: { + int err = 0; + err = mnt_want_write(filp->f_path.mnt); + if (err) { + unlock_kernel(); + return err; + } - if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) || - inode->i_flags & (S_IMMUTABLE|S_APPEND)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - err = -EPERM; - goto out_unlock_inode; + if (!is_owner_or_cap(inode)) { + err = -EACCES; + goto setflags_out; + } + if (get_user(flags, (int __user *)arg)) { + err = -EFAULT; + goto setflags_out; + } + if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || + HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { + if (!capable(CAP_LINUX_IMMUTABLE)) { + err = -EPERM; + goto setflags_out; + } } - } - /* don't silently ignore unsupported ext2 flags */ - if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { - err = -EOPNOTSUPP; - goto out_unlock_inode; + /* don't silently ignore unsupported ext2 flags */ + if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { + err = -EOPNOTSUPP; + goto setflags_out; + } + if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ + inode->i_flags |= S_IMMUTABLE; + HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; + } else { + inode->i_flags &= ~S_IMMUTABLE; + HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE; + } + if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */ + inode->i_flags |= S_APPEND; + HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND; + } else { + inode->i_flags &= ~S_APPEND; + HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND; + } + if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */ + HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP; + else + HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP; + + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); +setflags_out: + mnt_drop_write(filp->f_path.mnt); + unlock_kernel(); + return err; } - - if (flags & FS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - - if (flags & FS_APPEND_FL) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - - if (flags & FS_NODUMP_FL) - hip->userflags |= HFSPLUS_FLG_NODUMP; - else - hip->userflags &= ~HFSPLUS_FLG_NODUMP; - - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); - -out_unlock_inode: - mutex_lock(&inode->i_mutex); -out_drop_write: - mnt_drop_write(file->f_path.mnt); -out: - return err; -} - -long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - - switch (cmd) { - case HFSPLUS_IOC_EXT2_GETFLAGS: - return hfsplus_ioctl_getflags(file, argp); - case HFSPLUS_IOC_EXT2_SETFLAGS: - return hfsplus_ioctl_setflags(file, argp); default: + unlock_kernel(); return -ENOTTY; } } @@ -125,7 +110,7 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name, if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) return -EOPNOTSUPP; - res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); + res = hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); if (res) return res; res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); @@ -168,7 +153,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, return -EOPNOTSUPP; if (size) { - res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); + res = hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); if (res) return res; res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); @@ -192,7 +177,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, } else res = size ? -ERANGE : 4; } else - res = -EOPNOTSUPP; + res = -ENODATA; out: if (size) hfs_find_exit(&fd); diff --git a/trunk/fs/hfsplus/options.c b/trunk/fs/hfsplus/options.c index f9ab276a4d8d..572628b4b07d 100644 --- a/trunk/fs/hfsplus/options.c +++ b/trunk/fs/hfsplus/options.c @@ -143,13 +143,13 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) kfree(p); break; case opt_decompose: - clear_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); + sbi->flags &= ~HFSPLUS_SB_NODECOMPOSE; break; case opt_nodecompose: - set_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); + sbi->flags |= HFSPLUS_SB_NODECOMPOSE; break; case opt_force: - set_bit(HFSPLUS_SB_FORCE, &sbi->flags); + sbi->flags |= HFSPLUS_SB_FORCE; break; default: return 0; @@ -171,7 +171,7 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(mnt->mnt_sb); + struct hfsplus_sb_info *sbi = &HFSPLUS_SB(mnt->mnt_sb); if (sbi->creator != HFSPLUS_DEF_CR_TYPE) seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); @@ -184,7 +184,7 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) seq_printf(seq, ",session=%u", sbi->session); if (sbi->nls) seq_printf(seq, ",nls=%s", sbi->nls->charset); - if (test_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags)) + if (sbi->flags & HFSPLUS_SB_NODECOMPOSE) seq_printf(seq, ",nodecompose"); return 0; } diff --git a/trunk/fs/hfsplus/part_tbl.c b/trunk/fs/hfsplus/part_tbl.c index 208b16c645cc..1528a6fd0299 100644 --- a/trunk/fs/hfsplus/part_tbl.c +++ b/trunk/fs/hfsplus/part_tbl.c @@ -74,7 +74,6 @@ struct old_pmap { int hfs_part_find(struct super_block *sb, sector_t *part_start, sector_t *part_size) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct buffer_head *bh; __be16 *data; int i, size, res; @@ -96,7 +95,7 @@ int hfs_part_find(struct super_block *sb, for (i = 0; i < size; p++, i++) { if (p->pdStart && p->pdSize && p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ && - (sbi->part < 0 || sbi->part == i)) { + (HFSPLUS_SB(sb).part < 0 || HFSPLUS_SB(sb).part == i)) { *part_start += be32_to_cpu(p->pdStart); *part_size = be32_to_cpu(p->pdSize); res = 0; @@ -112,7 +111,7 @@ int hfs_part_find(struct super_block *sb, size = be32_to_cpu(pm->pmMapBlkCnt); for (i = 0; i < size;) { if (!memcmp(pm->pmPartType,"Apple_HFS", 9) && - (sbi->part < 0 || sbi->part == i)) { + (HFSPLUS_SB(sb).part < 0 || HFSPLUS_SB(sb).part == i)) { *part_start += be32_to_cpu(pm->pmPyPartStart); *part_size = be32_to_cpu(pm->pmPartBlkCnt); res = 0; diff --git a/trunk/fs/hfsplus/super.c b/trunk/fs/hfsplus/super.c index 9a88d7536103..3b55c050c742 100644 --- a/trunk/fs/hfsplus/super.c +++ b/trunk/fs/hfsplus/super.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -20,11 +21,40 @@ static void hfsplus_destroy_inode(struct inode *inode); #include "hfsplus_fs.h" -static int hfsplus_system_read_inode(struct inode *inode) +struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) { - struct hfsplus_vh *vhdr = HFSPLUS_SB(inode->i_sb)->s_vhdr; + struct hfs_find_data fd; + struct hfsplus_vh *vhdr; + struct inode *inode; + long err = -EIO; - switch (inode->i_ino) { + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); + mutex_init(&HFSPLUS_I(inode).extents_lock); + HFSPLUS_I(inode).flags = 0; + HFSPLUS_I(inode).rsrc_inode = NULL; + atomic_set(&HFSPLUS_I(inode).opencnt, 0); + + if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) { + read_inode: + hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); + err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); + if (!err) + err = hfsplus_cat_read_inode(inode, &fd); + hfs_find_exit(&fd); + if (err) + goto bad_inode; + goto done; + } + vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr; + switch(inode->i_ino) { + case HFSPLUS_ROOT_CNID: + goto read_inode; case HFSPLUS_EXT_CNID: hfsplus_inode_read_fork(inode, &vhdr->ext_file); inode->i_mapping->a_ops = &hfsplus_btree_aops; @@ -45,101 +75,74 @@ static int hfsplus_system_read_inode(struct inode *inode) inode->i_mapping->a_ops = &hfsplus_btree_aops; break; default: - return -EIO; - } - - return 0; -} - -struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) -{ - struct hfs_find_data fd; - struct inode *inode; - int err; - - inode = iget_locked(sb, ino); - if (!inode) - return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) - return inode; - - INIT_LIST_HEAD(&HFSPLUS_I(inode)->open_dir_list); - mutex_init(&HFSPLUS_I(inode)->extents_lock); - HFSPLUS_I(inode)->flags = 0; - HFSPLUS_I(inode)->rsrc_inode = NULL; - atomic_set(&HFSPLUS_I(inode)->opencnt, 0); - - if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || - inode->i_ino == HFSPLUS_ROOT_CNID) { - hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); - err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); - if (!err) - err = hfsplus_cat_read_inode(inode, &fd); - hfs_find_exit(&fd); - } else { - err = hfsplus_system_read_inode(inode); - } - - if (err) { - iget_failed(inode); - return ERR_PTR(err); + goto bad_inode; } +done: unlock_new_inode(inode); return inode; + +bad_inode: + iget_failed(inode); + return ERR_PTR(err); } -static int hfsplus_system_write_inode(struct inode *inode) +static int hfsplus_write_inode(struct inode *inode, + struct writeback_control *wbc) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); - struct hfsplus_vh *vhdr = sbi->s_vhdr; - struct hfsplus_fork_raw *fork; - struct hfs_btree *tree = NULL; + struct hfsplus_vh *vhdr; + int ret = 0; + dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); + hfsplus_ext_write_extent(inode); + if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) { + return hfsplus_cat_write_inode(inode); + } + vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr; switch (inode->i_ino) { + case HFSPLUS_ROOT_CNID: + ret = hfsplus_cat_write_inode(inode); + break; case HFSPLUS_EXT_CNID: - fork = &vhdr->ext_file; - tree = sbi->ext_tree; + if (vhdr->ext_file.total_size != cpu_to_be64(inode->i_size)) { + HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; + inode->i_sb->s_dirt = 1; + } + hfsplus_inode_write_fork(inode, &vhdr->ext_file); + hfs_btree_write(HFSPLUS_SB(inode->i_sb).ext_tree); break; case HFSPLUS_CAT_CNID: - fork = &vhdr->cat_file; - tree = sbi->cat_tree; + if (vhdr->cat_file.total_size != cpu_to_be64(inode->i_size)) { + HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; + inode->i_sb->s_dirt = 1; + } + hfsplus_inode_write_fork(inode, &vhdr->cat_file); + hfs_btree_write(HFSPLUS_SB(inode->i_sb).cat_tree); break; case HFSPLUS_ALLOC_CNID: - fork = &vhdr->alloc_file; + if (vhdr->alloc_file.total_size != cpu_to_be64(inode->i_size)) { + HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; + inode->i_sb->s_dirt = 1; + } + hfsplus_inode_write_fork(inode, &vhdr->alloc_file); break; case HFSPLUS_START_CNID: - fork = &vhdr->start_file; + if (vhdr->start_file.total_size != cpu_to_be64(inode->i_size)) { + HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; + inode->i_sb->s_dirt = 1; + } + hfsplus_inode_write_fork(inode, &vhdr->start_file); break; case HFSPLUS_ATTR_CNID: - fork = &vhdr->attr_file; - tree = sbi->attr_tree; - default: - return -EIO; - } - - if (fork->total_size != cpu_to_be64(inode->i_size)) { - set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags); - inode->i_sb->s_dirt = 1; + if (vhdr->attr_file.total_size != cpu_to_be64(inode->i_size)) { + HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; + inode->i_sb->s_dirt = 1; + } + hfsplus_inode_write_fork(inode, &vhdr->attr_file); + hfs_btree_write(HFSPLUS_SB(inode->i_sb).attr_tree); + break; } - hfsplus_inode_write_fork(inode, fork); - if (tree) - hfs_btree_write(tree); - return 0; -} - -static int hfsplus_write_inode(struct inode *inode, - struct writeback_control *wbc) -{ - dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); - - hfsplus_ext_write_extent(inode); - - if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || - inode->i_ino == HFSPLUS_ROOT_CNID) - return hfsplus_cat_write_inode(inode); - else - return hfsplus_system_write_inode(inode); + return ret; } static void hfsplus_evict_inode(struct inode *inode) @@ -148,53 +151,51 @@ static void hfsplus_evict_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); if (HFSPLUS_IS_RSRC(inode)) { - HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; - iput(HFSPLUS_I(inode)->rsrc_inode); + HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL; + iput(HFSPLUS_I(inode).rsrc_inode); } } int hfsplus_sync_fs(struct super_block *sb, int wait) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); - struct hfsplus_vh *vhdr = sbi->s_vhdr; + struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; dprint(DBG_SUPER, "hfsplus_write_super\n"); - mutex_lock(&sbi->vh_mutex); - mutex_lock(&sbi->alloc_mutex); + lock_super(sb); sb->s_dirt = 0; - vhdr->free_blocks = cpu_to_be32(sbi->free_blocks); - vhdr->next_cnid = cpu_to_be32(sbi->next_cnid); - vhdr->folder_count = cpu_to_be32(sbi->folder_count); - vhdr->file_count = cpu_to_be32(sbi->file_count); + vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks); + vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc); + vhdr->next_cnid = cpu_to_be32(HFSPLUS_SB(sb).next_cnid); + vhdr->folder_count = cpu_to_be32(HFSPLUS_SB(sb).folder_count); + vhdr->file_count = cpu_to_be32(HFSPLUS_SB(sb).file_count); - mark_buffer_dirty(sbi->s_vhbh); - if (test_and_clear_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags)) { - if (sbi->sect_count) { + mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); + if (HFSPLUS_SB(sb).flags & HFSPLUS_SB_WRITEBACKUP) { + if (HFSPLUS_SB(sb).sect_count) { struct buffer_head *bh; u32 block, offset; - block = sbi->blockoffset; - block += (sbi->sect_count - 2) >> (sb->s_blocksize_bits - 9); - offset = ((sbi->sect_count - 2) << 9) & (sb->s_blocksize - 1); - printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", - sbi->blockoffset, sbi->sect_count, - block, offset); + block = HFSPLUS_SB(sb).blockoffset; + block += (HFSPLUS_SB(sb).sect_count - 2) >> (sb->s_blocksize_bits - 9); + offset = ((HFSPLUS_SB(sb).sect_count - 2) << 9) & (sb->s_blocksize - 1); + printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", HFSPLUS_SB(sb).blockoffset, + HFSPLUS_SB(sb).sect_count, block, offset); bh = sb_bread(sb, block); if (bh) { vhdr = (struct hfsplus_vh *)(bh->b_data + offset); if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) { - memcpy(vhdr, sbi->s_vhdr, sizeof(*vhdr)); + memcpy(vhdr, HFSPLUS_SB(sb).s_vhdr, sizeof(*vhdr)); mark_buffer_dirty(bh); brelse(bh); } else printk(KERN_WARNING "hfs: backup not found!\n"); } } + HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP; } - mutex_unlock(&sbi->alloc_mutex); - mutex_unlock(&sbi->vh_mutex); + unlock_super(sb); return 0; } @@ -208,48 +209,48 @@ static void hfsplus_write_super(struct super_block *sb) static void hfsplus_put_super(struct super_block *sb) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); - dprint(DBG_SUPER, "hfsplus_put_super\n"); - if (!sb->s_fs_info) return; + lock_kernel(); + if (sb->s_dirt) hfsplus_write_super(sb); - if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { - struct hfsplus_vh *vhdr = sbi->s_vhdr; + if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) { + struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; vhdr->modify_date = hfsp_now2mt(); vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); - mark_buffer_dirty(sbi->s_vhbh); - sync_dirty_buffer(sbi->s_vhbh); + mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); + sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); } - hfs_btree_close(sbi->cat_tree); - hfs_btree_close(sbi->ext_tree); - iput(sbi->alloc_file); - iput(sbi->hidden_dir); - brelse(sbi->s_vhbh); - unload_nls(sbi->nls); + hfs_btree_close(HFSPLUS_SB(sb).cat_tree); + hfs_btree_close(HFSPLUS_SB(sb).ext_tree); + iput(HFSPLUS_SB(sb).alloc_file); + iput(HFSPLUS_SB(sb).hidden_dir); + brelse(HFSPLUS_SB(sb).s_vhbh); + unload_nls(HFSPLUS_SB(sb).nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; + + unlock_kernel(); } static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); u64 id = huge_encode_dev(sb->s_bdev->bd_dev); buf->f_type = HFSPLUS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; - buf->f_blocks = sbi->total_blocks << sbi->fs_shift; - buf->f_bfree = sbi->free_blocks << sbi->fs_shift; + buf->f_blocks = HFSPLUS_SB(sb).total_blocks << HFSPLUS_SB(sb).fs_shift; + buf->f_bfree = HFSPLUS_SB(sb).free_blocks << HFSPLUS_SB(sb).fs_shift; buf->f_bavail = buf->f_bfree; buf->f_files = 0xFFFFFFFF; - buf->f_ffree = 0xFFFFFFFF - sbi->next_cnid; + buf->f_ffree = 0xFFFFFFFF - HFSPLUS_SB(sb).next_cnid; buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); buf->f_namelen = HFSPLUS_MAX_STRLEN; @@ -262,11 +263,11 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; if (!(*flags & MS_RDONLY)) { - struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr; + struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; struct hfsplus_sb_info sbi; memset(&sbi, 0, sizeof(struct hfsplus_sb_info)); - sbi.nls = HFSPLUS_SB(sb)->nls; + sbi.nls = HFSPLUS_SB(sb).nls; if (!hfsplus_parse_options(data, &sbi)) return -EINVAL; @@ -275,7 +276,7 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) "running fsck.hfsplus is recommended. leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; - } else if (test_bit(HFSPLUS_SB_FORCE, &sbi.flags)) { + } else if (sbi.flags & HFSPLUS_SB_FORCE) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); @@ -319,8 +320,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; sb->s_fs_info = sbi; - mutex_init(&sbi->alloc_mutex); - mutex_init(&sbi->vh_mutex); + INIT_HLIST_HEAD(&sbi->rsrc_inodes); hfsplus_fill_defaults(sbi); if (!hfsplus_parse_options(data, sbi)) { printk(KERN_ERR "hfs: unable to parse mount options\n"); @@ -344,7 +344,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = -EINVAL; goto cleanup; } - vhdr = sbi->s_vhdr; + vhdr = HFSPLUS_SB(sb).s_vhdr; /* Copy parts of the volume header into the superblock */ sb->s_magic = HFSPLUS_VOLHEAD_SIG; @@ -353,19 +353,18 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) printk(KERN_ERR "hfs: wrong filesystem version\n"); goto cleanup; } - sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); - sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); - sbi->next_cnid = be32_to_cpu(vhdr->next_cnid); - sbi->file_count = be32_to_cpu(vhdr->file_count); - sbi->folder_count = be32_to_cpu(vhdr->folder_count); - sbi->data_clump_blocks = - be32_to_cpu(vhdr->data_clump_sz) >> sbi->alloc_blksz_shift; - if (!sbi->data_clump_blocks) - sbi->data_clump_blocks = 1; - sbi->rsrc_clump_blocks = - be32_to_cpu(vhdr->rsrc_clump_sz) >> sbi->alloc_blksz_shift; - if (!sbi->rsrc_clump_blocks) - sbi->rsrc_clump_blocks = 1; + HFSPLUS_SB(sb).total_blocks = be32_to_cpu(vhdr->total_blocks); + HFSPLUS_SB(sb).free_blocks = be32_to_cpu(vhdr->free_blocks); + HFSPLUS_SB(sb).next_alloc = be32_to_cpu(vhdr->next_alloc); + HFSPLUS_SB(sb).next_cnid = be32_to_cpu(vhdr->next_cnid); + HFSPLUS_SB(sb).file_count = be32_to_cpu(vhdr->file_count); + HFSPLUS_SB(sb).folder_count = be32_to_cpu(vhdr->folder_count); + HFSPLUS_SB(sb).data_clump_blocks = be32_to_cpu(vhdr->data_clump_sz) >> HFSPLUS_SB(sb).alloc_blksz_shift; + if (!HFSPLUS_SB(sb).data_clump_blocks) + HFSPLUS_SB(sb).data_clump_blocks = 1; + HFSPLUS_SB(sb).rsrc_clump_blocks = be32_to_cpu(vhdr->rsrc_clump_sz) >> HFSPLUS_SB(sb).alloc_blksz_shift; + if (!HFSPLUS_SB(sb).rsrc_clump_blocks) + HFSPLUS_SB(sb).rsrc_clump_blocks = 1; /* Set up operations so we can load metadata */ sb->s_op = &hfsplus_sops; @@ -375,7 +374,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, " "running fsck.hfsplus is recommended. mounting read-only.\n"); sb->s_flags |= MS_RDONLY; - } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { + } else if (sbi->flags & HFSPLUS_SB_FORCE) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); @@ -385,15 +384,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) "use the force option at your own risk, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } + sbi->flags &= ~HFSPLUS_SB_FORCE; /* Load metadata objects (B*Trees) */ - sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); - if (!sbi->ext_tree) { + HFSPLUS_SB(sb).ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); + if (!HFSPLUS_SB(sb).ext_tree) { printk(KERN_ERR "hfs: failed to load extents file\n"); goto cleanup; } - sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); - if (!sbi->cat_tree) { + HFSPLUS_SB(sb).cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); + if (!HFSPLUS_SB(sb).cat_tree) { printk(KERN_ERR "hfs: failed to load catalog file\n"); goto cleanup; } @@ -404,7 +404,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = PTR_ERR(inode); goto cleanup; } - sbi->alloc_file = inode; + HFSPLUS_SB(sb).alloc_file = inode; /* Load the root directory */ root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); @@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; - hfs_find_init(sbi->cat_tree, &fd); + hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { hfs_find_exit(&fd); @@ -434,7 +434,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = PTR_ERR(inode); goto cleanup; } - sbi->hidden_dir = inode; + HFSPLUS_SB(sb).hidden_dir = inode; } else hfs_find_exit(&fd); @@ -449,19 +449,15 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) be32_add_cpu(&vhdr->write_count, 1); vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); - mark_buffer_dirty(sbi->s_vhbh); - sync_dirty_buffer(sbi->s_vhbh); + mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); + sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); - if (!sbi->hidden_dir) { + if (!HFSPLUS_SB(sb).hidden_dir) { printk(KERN_DEBUG "hfs: create hidden dir...\n"); - - mutex_lock(&sbi->vh_mutex); - sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); - hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode, - &str, sbi->hidden_dir); - mutex_unlock(&sbi->vh_mutex); - - mark_inode_dirty(sbi->hidden_dir); + HFSPLUS_SB(sb).hidden_dir = hfsplus_new_inode(sb, S_IFDIR); + hfsplus_create_cat(HFSPLUS_SB(sb).hidden_dir->i_ino, sb->s_root->d_inode, + &str, HFSPLUS_SB(sb).hidden_dir); + mark_inode_dirty(HFSPLUS_SB(sb).hidden_dir); } out: unload_nls(sbi->nls); @@ -490,7 +486,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb) static void hfsplus_destroy_inode(struct inode *inode) { - kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); + kmem_cache_free(hfsplus_inode_cachep, &HFSPLUS_I(inode)); } #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) diff --git a/trunk/fs/hfsplus/unicode.c b/trunk/fs/hfsplus/unicode.c index b66d67de882c..628ccf6fa402 100644 --- a/trunk/fs/hfsplus/unicode.c +++ b/trunk/fs/hfsplus/unicode.c @@ -121,7 +121,7 @@ static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) { const hfsplus_unichr *ip; - struct nls_table *nls = HFSPLUS_SB(sb)->nls; + struct nls_table *nls = HFSPLUS_SB(sb).nls; u8 *op; u16 cc, c0, c1; u16 *ce1, *ce2; @@ -132,7 +132,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c ustrlen = be16_to_cpu(ustr->length); len = *len_p; ce1 = NULL; - compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); + compose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); while (ustrlen > 0) { c0 = be16_to_cpu(*ip++); @@ -246,7 +246,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c static inline int asc2unichar(struct super_block *sb, const char *astr, int len, wchar_t *uc) { - int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); + int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc); if (size <= 0) { *uc = '?'; size = 1; @@ -293,7 +293,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, u16 *dstr, outlen = 0; wchar_t c; - decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); + decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { size = asc2unichar(sb, astr, len, &c); @@ -330,8 +330,8 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) wchar_t c; u16 c2; - casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); - decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); + casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); + decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); hash = init_name_hash(); astr = str->name; len = str->len; @@ -373,8 +373,8 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr * u16 c1, c2; wchar_t c; - casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); - decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); + casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); + decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); astr1 = s1->name; len1 = s1->len; astr2 = s2->name; diff --git a/trunk/fs/hfsplus/wrapper.c b/trunk/fs/hfsplus/wrapper.c index 8972c20b3216..bed78ac8f6d1 100644 --- a/trunk/fs/hfsplus/wrapper.c +++ b/trunk/fs/hfsplus/wrapper.c @@ -65,8 +65,8 @@ static int hfsplus_get_last_session(struct super_block *sb, *start = 0; *size = sb->s_bdev->bd_inode->i_size >> 9; - if (HFSPLUS_SB(sb)->session >= 0) { - te.cdte_track = HFSPLUS_SB(sb)->session; + if (HFSPLUS_SB(sb).session >= 0) { + te.cdte_track = HFSPLUS_SB(sb).session; te.cdte_format = CDROM_LBA; res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te); if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) { @@ -87,7 +87,6 @@ static int hfsplus_get_last_session(struct super_block *sb, /* Takes in super block, returns true if good data read */ int hfsplus_read_wrapper(struct super_block *sb) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct buffer_head *bh; struct hfsplus_vh *vhdr; struct hfsplus_wd wd; @@ -123,7 +122,7 @@ int hfsplus_read_wrapper(struct super_block *sb) if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) break; if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) { - set_bit(HFSPLUS_SB_HFSX, &sbi->flags); + HFSPLUS_SB(sb).flags |= HFSPLUS_SB_HFSX; break; } brelse(bh); @@ -144,11 +143,11 @@ int hfsplus_read_wrapper(struct super_block *sb) if (blocksize < HFSPLUS_SECTOR_SIZE || ((blocksize - 1) & blocksize)) return -EINVAL; - sbi->alloc_blksz = blocksize; - sbi->alloc_blksz_shift = 0; + HFSPLUS_SB(sb).alloc_blksz = blocksize; + HFSPLUS_SB(sb).alloc_blksz_shift = 0; while ((blocksize >>= 1) != 0) - sbi->alloc_blksz_shift++; - blocksize = min(sbi->alloc_blksz, (u32)PAGE_SIZE); + HFSPLUS_SB(sb).alloc_blksz_shift++; + blocksize = min(HFSPLUS_SB(sb).alloc_blksz, (u32)PAGE_SIZE); /* align block size to block offset */ while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) @@ -159,26 +158,23 @@ int hfsplus_read_wrapper(struct super_block *sb) return -EINVAL; } - sbi->blockoffset = - part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); - sbi->sect_count = part_size; - sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; + HFSPLUS_SB(sb).blockoffset = part_start >> + (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); + HFSPLUS_SB(sb).sect_count = part_size; + HFSPLUS_SB(sb).fs_shift = HFSPLUS_SB(sb).alloc_blksz_shift - + sb->s_blocksize_bits; bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr); if (!bh) return -EIO; /* should still be the same... */ - if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { - if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) - goto error; - } else { - if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) - goto error; - } - - sbi->s_vhbh = bh; - sbi->s_vhdr = vhdr; + if (vhdr->signature != (HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX ? + cpu_to_be16(HFSPLUS_VOLHEAD_SIGX) : + cpu_to_be16(HFSPLUS_VOLHEAD_SIG))) + goto error; + HFSPLUS_SB(sb).s_vhbh = bh; + HFSPLUS_SB(sb).s_vhdr = vhdr; return 0; error: diff --git a/trunk/include/linux/ceph/debugfs.h b/trunk/include/linux/ceph/debugfs.h deleted file mode 100644 index 2a79702e092b..000000000000 --- a/trunk/include/linux/ceph/debugfs.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _FS_CEPH_DEBUGFS_H -#define _FS_CEPH_DEBUGFS_H - -#include "ceph_debug.h" -#include "types.h" - -#define CEPH_DEFINE_SHOW_FUNC(name) \ -static int name##_open(struct inode *inode, struct file *file) \ -{ \ - struct seq_file *sf; \ - int ret; \ - \ - ret = single_open(file, name, NULL); \ - sf = file->private_data; \ - sf->private = inode->i_private; \ - return ret; \ -} \ - \ -static const struct file_operations name##_fops = { \ - .open = name##_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -}; - -/* debugfs.c */ -extern int ceph_debugfs_init(void); -extern void ceph_debugfs_cleanup(void); -extern int ceph_debugfs_client_init(struct ceph_client *client); -extern void ceph_debugfs_client_cleanup(struct ceph_client *client); - -#endif - diff --git a/trunk/include/linux/ceph/libceph.h b/trunk/include/linux/ceph/libceph.h deleted file mode 100644 index f22b2e941686..000000000000 --- a/trunk/include/linux/ceph/libceph.h +++ /dev/null @@ -1,249 +0,0 @@ -#ifndef _FS_CEPH_LIBCEPH_H -#define _FS_CEPH_LIBCEPH_H - -#include "ceph_debug.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "types.h" -#include "messenger.h" -#include "msgpool.h" -#include "mon_client.h" -#include "osd_client.h" -#include "ceph_fs.h" - -/* - * Supported features - */ -#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR -#define CEPH_FEATURE_REQUIRED_DEFAULT CEPH_FEATURE_NOSRCADDR - -/* - * mount options - */ -#define CEPH_OPT_FSID (1<<0) -#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ -#define CEPH_OPT_MYIP (1<<2) /* specified my ip */ -#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ - -#define CEPH_OPT_DEFAULT (0); - -#define ceph_set_opt(client, opt) \ - (client)->options->flags |= CEPH_OPT_##opt; -#define ceph_test_opt(client, opt) \ - (!!((client)->options->flags & CEPH_OPT_##opt)) - -struct ceph_options { - int flags; - struct ceph_fsid fsid; - struct ceph_entity_addr my_addr; - int mount_timeout; - int osd_idle_ttl; - int osd_timeout; - int osd_keepalive_timeout; - - /* - * any type that can't be simply compared or doesn't need need - * to be compared should go beyond this point, - * ceph_compare_options() should be updated accordingly - */ - - struct ceph_entity_addr *mon_addr; /* should be the first - pointer type of args */ - int num_mon; - char *name; - char *secret; -}; - -/* - * defaults - */ -#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 -#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ -#define CEPH_OSD_KEEPALIVE_DEFAULT 5 -#define CEPH_OSD_IDLE_TTL_DEFAULT 60 -#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ - -#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) -#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) - -#define CEPH_AUTH_NAME_DEFAULT "guest" - -/* - * Delay telling the MDS we no longer want caps, in case we reopen - * the file. Delay a minimum amount of time, even if we send a cap - * message for some other reason. Otherwise, take the oppotunity to - * update the mds to avoid sending another message later. - */ -#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ -#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ - -#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) - -/* mount state */ -enum { - CEPH_MOUNT_MOUNTING, - CEPH_MOUNT_MOUNTED, - CEPH_MOUNT_UNMOUNTING, - CEPH_MOUNT_UNMOUNTED, - CEPH_MOUNT_SHUTDOWN, -}; - -/* - * subtract jiffies - */ -static inline unsigned long time_sub(unsigned long a, unsigned long b) -{ - BUG_ON(time_after(b, a)); - return (long)a - (long)b; -} - -struct ceph_mds_client; - -/* - * per client state - * - * possibly shared by multiple mount points, if they are - * mounting the same ceph filesystem/cluster. - */ -struct ceph_client { - struct ceph_fsid fsid; - bool have_fsid; - - void *private; - - struct ceph_options *options; - - struct mutex mount_mutex; /* serialize mount attempts */ - wait_queue_head_t auth_wq; - int auth_err; - - int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *); - - u32 supported_features; - u32 required_features; - - struct ceph_messenger *msgr; /* messenger instance */ - struct ceph_mon_client monc; - struct ceph_osd_client osdc; - -#ifdef CONFIG_DEBUG_FS - struct dentry *debugfs_dir; - struct dentry *debugfs_monmap; - struct dentry *debugfs_osdmap; -#endif -}; - - - -/* - * snapshots - */ - -/* - * A "snap context" is the set of existing snapshots when we - * write data. It is used by the OSD to guide its COW behavior. - * - * The ceph_snap_context is refcounted, and attached to each dirty - * page, indicating which context the dirty data belonged when it was - * dirtied. - */ -struct ceph_snap_context { - atomic_t nref; - u64 seq; - int num_snaps; - u64 snaps[]; -}; - -static inline struct ceph_snap_context * -ceph_get_snap_context(struct ceph_snap_context *sc) -{ - /* - printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), - atomic_read(&sc->nref)+1); - */ - if (sc) - atomic_inc(&sc->nref); - return sc; -} - -static inline void ceph_put_snap_context(struct ceph_snap_context *sc) -{ - if (!sc) - return; - /* - printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), - atomic_read(&sc->nref)-1); - */ - if (atomic_dec_and_test(&sc->nref)) { - /*printk(" deleting snap_context %p\n", sc);*/ - kfree(sc); - } -} - -/* - * calculate the number of pages a given length and offset map onto, - * if we align the data. - */ -static inline int calc_pages_for(u64 off, u64 len) -{ - return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - - (off >> PAGE_CACHE_SHIFT); -} - -/* ceph_common.c */ -extern const char *ceph_msg_type_name(int type); -extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); -extern struct kmem_cache *ceph_inode_cachep; -extern struct kmem_cache *ceph_cap_cachep; -extern struct kmem_cache *ceph_dentry_cachep; -extern struct kmem_cache *ceph_file_cachep; - -extern int ceph_parse_options(struct ceph_options **popt, char *options, - const char *dev_name, const char *dev_name_end, - int (*parse_extra_token)(char *c, void *private), - void *private); -extern void ceph_destroy_options(struct ceph_options *opt); -extern int ceph_compare_options(struct ceph_options *new_opt, - struct ceph_client *client); -extern struct ceph_client *ceph_create_client(struct ceph_options *opt, - void *private); -extern u64 ceph_client_id(struct ceph_client *client); -extern void ceph_destroy_client(struct ceph_client *client); -extern int __ceph_open_session(struct ceph_client *client, - unsigned long started); -extern int ceph_open_session(struct ceph_client *client); - -/* pagevec.c */ -extern void ceph_release_page_vector(struct page **pages, int num_pages); - -extern struct page **ceph_get_direct_page_vector(const char __user *data, - int num_pages, - loff_t off, size_t len); -extern void ceph_put_page_vector(struct page **pages, int num_pages); -extern void ceph_release_page_vector(struct page **pages, int num_pages); -extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); -extern int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, - loff_t off, size_t len); -extern int ceph_copy_to_page_vector(struct page **pages, - const char *data, - loff_t off, size_t len); -extern int ceph_copy_from_page_vector(struct page **pages, - char *data, - loff_t off, size_t len); -extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, - loff_t off, size_t len); -extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); - - -#endif /* _FS_CEPH_SUPER_H */ diff --git a/trunk/net/Kconfig b/trunk/net/Kconfig index 55fd82e9ffd9..e926884c1675 100644 --- a/trunk/net/Kconfig +++ b/trunk/net/Kconfig @@ -293,7 +293,6 @@ source "net/wimax/Kconfig" source "net/rfkill/Kconfig" source "net/9p/Kconfig" source "net/caif/Kconfig" -source "net/ceph/Kconfig" endif # if NET diff --git a/trunk/net/Makefile b/trunk/net/Makefile index 6b7bfd7f1416..ea60fbce9b1b 100644 --- a/trunk/net/Makefile +++ b/trunk/net/Makefile @@ -68,4 +68,3 @@ obj-$(CONFIG_SYSCTL) += sysctl_net.o endif obj-$(CONFIG_WIMAX) += wimax/ obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ -obj-$(CONFIG_CEPH_LIB) += ceph/ diff --git a/trunk/net/ceph/Kconfig b/trunk/net/ceph/Kconfig deleted file mode 100644 index ad424049b0cf..000000000000 --- a/trunk/net/ceph/Kconfig +++ /dev/null @@ -1,28 +0,0 @@ -config CEPH_LIB - tristate "Ceph core library (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL - select LIBCRC32C - select CRYPTO_AES - select CRYPTO - default n - help - Choose Y or M here to include cephlib, which provides the - common functionality to both the Ceph filesystem and - to the rados block device (rbd). - - More information at http://ceph.newdream.net/. - - If unsure, say N. - -config CEPH_LIB_PRETTYDEBUG - bool "Include file:line in ceph debug output" - depends on CEPH_LIB - default n - help - If you say Y here, debug output will include a filename and - line to aid debugging. This increases kernel size and slows - execution slightly when debug call sites are enabled (e.g., - via CONFIG_DYNAMIC_DEBUG). - - If unsure, say N. - diff --git a/trunk/net/ceph/Makefile b/trunk/net/ceph/Makefile deleted file mode 100644 index aab1cabb8035..000000000000 --- a/trunk/net/ceph/Makefile +++ /dev/null @@ -1,37 +0,0 @@ -# -# Makefile for CEPH filesystem. -# - -ifneq ($(KERNELRELEASE),) - -obj-$(CONFIG_CEPH_LIB) += libceph.o - -libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ - mon_client.o \ - osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ - debugfs.o \ - auth.o auth_none.o \ - crypto.o armor.o \ - auth_x.o \ - ceph_fs.o ceph_strings.o ceph_hash.o \ - pagevec.o - -else -#Otherwise we were called directly from the command -# line; invoke the kernel build system. - -KERNELDIR ?= /lib/modules/$(shell uname -r)/build -PWD := $(shell pwd) - -default: all - -all: - $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules - -modules_install: - $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install - -clean: - $(MAKE) -C $(KERNELDIR) M=$(PWD) clean - -endif diff --git a/trunk/net/ceph/ceph_common.c b/trunk/net/ceph/ceph_common.c deleted file mode 100644 index f3e4a13fea0c..000000000000 --- a/trunk/net/ceph/ceph_common.c +++ /dev/null @@ -1,529 +0,0 @@ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include -#include -#include - - - -/* - * find filename portion of a path (/foo/bar/baz -> baz) - */ -const char *ceph_file_part(const char *s, int len) -{ - const char *e = s + len; - - while (e != s && *(e-1) != '/') - e--; - return e; -} -EXPORT_SYMBOL(ceph_file_part); - -const char *ceph_msg_type_name(int type) -{ - switch (type) { - case CEPH_MSG_SHUTDOWN: return "shutdown"; - case CEPH_MSG_PING: return "ping"; - case CEPH_MSG_AUTH: return "auth"; - case CEPH_MSG_AUTH_REPLY: return "auth_reply"; - case CEPH_MSG_MON_MAP: return "mon_map"; - case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; - case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; - case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; - case CEPH_MSG_STATFS: return "statfs"; - case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; - case CEPH_MSG_MDS_MAP: return "mds_map"; - case CEPH_MSG_CLIENT_SESSION: return "client_session"; - case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; - case CEPH_MSG_CLIENT_REQUEST: return "client_request"; - case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; - case CEPH_MSG_CLIENT_REPLY: return "client_reply"; - case CEPH_MSG_CLIENT_CAPS: return "client_caps"; - case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; - case CEPH_MSG_CLIENT_SNAP: return "client_snap"; - case CEPH_MSG_CLIENT_LEASE: return "client_lease"; - case CEPH_MSG_OSD_MAP: return "osd_map"; - case CEPH_MSG_OSD_OP: return "osd_op"; - case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; - default: return "unknown"; - } -} -EXPORT_SYMBOL(ceph_msg_type_name); - -/* - * Initially learn our fsid, or verify an fsid matches. - */ -int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) -{ - if (client->have_fsid) { - if (ceph_fsid_compare(&client->fsid, fsid)) { - pr_err("bad fsid, had %pU got %pU", - &client->fsid, fsid); - return -1; - } - } else { - pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); - memcpy(&client->fsid, fsid, sizeof(*fsid)); - ceph_debugfs_client_init(client); - client->have_fsid = true; - } - return 0; -} -EXPORT_SYMBOL(ceph_check_fsid); - -static int strcmp_null(const char *s1, const char *s2) -{ - if (!s1 && !s2) - return 0; - if (s1 && !s2) - return -1; - if (!s1 && s2) - return 1; - return strcmp(s1, s2); -} - -int ceph_compare_options(struct ceph_options *new_opt, - struct ceph_client *client) -{ - struct ceph_options *opt1 = new_opt; - struct ceph_options *opt2 = client->options; - int ofs = offsetof(struct ceph_options, mon_addr); - int i; - int ret; - - ret = memcmp(opt1, opt2, ofs); - if (ret) - return ret; - - ret = strcmp_null(opt1->name, opt2->name); - if (ret) - return ret; - - ret = strcmp_null(opt1->secret, opt2->secret); - if (ret) - return ret; - - /* any matching mon ip implies a match */ - for (i = 0; i < opt1->num_mon; i++) { - if (ceph_monmap_contains(client->monc.monmap, - &opt1->mon_addr[i])) - return 0; - } - return -1; -} -EXPORT_SYMBOL(ceph_compare_options); - - -static int parse_fsid(const char *str, struct ceph_fsid *fsid) -{ - int i = 0; - char tmp[3]; - int err = -EINVAL; - int d; - - dout("parse_fsid '%s'\n", str); - tmp[2] = 0; - while (*str && i < 16) { - if (ispunct(*str)) { - str++; - continue; - } - if (!isxdigit(str[0]) || !isxdigit(str[1])) - break; - tmp[0] = str[0]; - tmp[1] = str[1]; - if (sscanf(tmp, "%x", &d) < 1) - break; - fsid->fsid[i] = d & 0xff; - i++; - str += 2; - } - - if (i == 16) - err = 0; - dout("parse_fsid ret %d got fsid %pU", err, fsid); - return err; -} - -/* - * ceph options - */ -enum { - Opt_osdtimeout, - Opt_osdkeepalivetimeout, - Opt_mount_timeout, - Opt_osd_idle_ttl, - Opt_last_int, - /* int args above */ - Opt_fsid, - Opt_name, - Opt_secret, - Opt_ip, - Opt_last_string, - /* string args above */ - Opt_noshare, - Opt_nocrc, -}; - -static match_table_t opt_tokens = { - {Opt_osdtimeout, "osdtimeout=%d"}, - {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, - {Opt_mount_timeout, "mount_timeout=%d"}, - {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, - /* int args above */ - {Opt_fsid, "fsid=%s"}, - {Opt_name, "name=%s"}, - {Opt_secret, "secret=%s"}, - {Opt_ip, "ip=%s"}, - /* string args above */ - {Opt_noshare, "noshare"}, - {Opt_nocrc, "nocrc"}, - {-1, NULL} -}; - -void ceph_destroy_options(struct ceph_options *opt) -{ - dout("destroy_options %p\n", opt); - kfree(opt->name); - kfree(opt->secret); - kfree(opt); -} -EXPORT_SYMBOL(ceph_destroy_options); - -int ceph_parse_options(struct ceph_options **popt, char *options, - const char *dev_name, const char *dev_name_end, - int (*parse_extra_token)(char *c, void *private), - void *private) -{ - struct ceph_options *opt; - const char *c; - int err = -ENOMEM; - substring_t argstr[MAX_OPT_ARGS]; - - opt = kzalloc(sizeof(*opt), GFP_KERNEL); - if (!opt) - return err; - opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), - GFP_KERNEL); - if (!opt->mon_addr) - goto out; - - dout("parse_options %p options '%s' dev_name '%s'\n", opt, options, - dev_name); - - /* start with defaults */ - opt->flags = CEPH_OPT_DEFAULT; - opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; - opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; - opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ - opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ - - /* get mon ip(s) */ - /* ip1[:port1][,ip2[:port2]...] */ - err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr, - CEPH_MAX_MON, &opt->num_mon); - if (err < 0) - goto out; - - /* parse mount options */ - while ((c = strsep(&options, ",")) != NULL) { - int token, intval, ret; - if (!*c) - continue; - err = -EINVAL; - token = match_token((char *)c, opt_tokens, argstr); - if (token < 0 && parse_extra_token) { - /* extra? */ - err = parse_extra_token((char *)c, private); - if (err < 0) { - pr_err("bad option at '%s'\n", c); - goto out; - } - continue; - } - if (token < Opt_last_int) { - ret = match_int(&argstr[0], &intval); - if (ret < 0) { - pr_err("bad mount option arg (not int) " - "at '%s'\n", c); - continue; - } - dout("got int token %d val %d\n", token, intval); - } else if (token > Opt_last_int && token < Opt_last_string) { - dout("got string token %d val %s\n", token, - argstr[0].from); - } else { - dout("got token %d\n", token); - } - switch (token) { - case Opt_ip: - err = ceph_parse_ips(argstr[0].from, - argstr[0].to, - &opt->my_addr, - 1, NULL); - if (err < 0) - goto out; - opt->flags |= CEPH_OPT_MYIP; - break; - - case Opt_fsid: - err = parse_fsid(argstr[0].from, &opt->fsid); - if (err == 0) - opt->flags |= CEPH_OPT_FSID; - break; - case Opt_name: - opt->name = kstrndup(argstr[0].from, - argstr[0].to-argstr[0].from, - GFP_KERNEL); - break; - case Opt_secret: - opt->secret = kstrndup(argstr[0].from, - argstr[0].to-argstr[0].from, - GFP_KERNEL); - break; - - /* misc */ - case Opt_osdtimeout: - opt->osd_timeout = intval; - break; - case Opt_osdkeepalivetimeout: - opt->osd_keepalive_timeout = intval; - break; - case Opt_osd_idle_ttl: - opt->osd_idle_ttl = intval; - break; - case Opt_mount_timeout: - opt->mount_timeout = intval; - break; - - case Opt_noshare: - opt->flags |= CEPH_OPT_NOSHARE; - break; - - case Opt_nocrc: - opt->flags |= CEPH_OPT_NOCRC; - break; - - default: - BUG_ON(token); - } - } - - /* success */ - *popt = opt; - return 0; - -out: - ceph_destroy_options(opt); - return err; -} -EXPORT_SYMBOL(ceph_parse_options); - -u64 ceph_client_id(struct ceph_client *client) -{ - return client->monc.auth->global_id; -} -EXPORT_SYMBOL(ceph_client_id); - -/* - * create a fresh client instance - */ -struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) -{ - struct ceph_client *client; - int err = -ENOMEM; - - client = kzalloc(sizeof(*client), GFP_KERNEL); - if (client == NULL) - return ERR_PTR(-ENOMEM); - - client->private = private; - client->options = opt; - - mutex_init(&client->mount_mutex); - init_waitqueue_head(&client->auth_wq); - client->auth_err = 0; - - client->extra_mon_dispatch = NULL; - client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT; - client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT; - - client->msgr = NULL; - - /* subsystems */ - err = ceph_monc_init(&client->monc, client); - if (err < 0) - goto fail; - err = ceph_osdc_init(&client->osdc, client); - if (err < 0) - goto fail_monc; - - return client; - -fail_monc: - ceph_monc_stop(&client->monc); -fail: - kfree(client); - return ERR_PTR(err); -} -EXPORT_SYMBOL(ceph_create_client); - -void ceph_destroy_client(struct ceph_client *client) -{ - dout("destroy_client %p\n", client); - - /* unmount */ - ceph_osdc_stop(&client->osdc); - - /* - * make sure mds and osd connections close out before destroying - * the auth module, which is needed to free those connections' - * ceph_authorizers. - */ - ceph_msgr_flush(); - - ceph_monc_stop(&client->monc); - - ceph_debugfs_client_cleanup(client); - - if (client->msgr) - ceph_messenger_destroy(client->msgr); - - ceph_destroy_options(client->options); - - kfree(client); - dout("destroy_client %p done\n", client); -} -EXPORT_SYMBOL(ceph_destroy_client); - -/* - * true if we have the mon map (and have thus joined the cluster) - */ -static int have_mon_and_osd_map(struct ceph_client *client) -{ - return client->monc.monmap && client->monc.monmap->epoch && - client->osdc.osdmap && client->osdc.osdmap->epoch; -} - -/* - * mount: join the ceph cluster, and open root directory. - */ -int __ceph_open_session(struct ceph_client *client, unsigned long started) -{ - struct ceph_entity_addr *myaddr = NULL; - int err; - unsigned long timeout = client->options->mount_timeout * HZ; - - /* initialize the messenger */ - if (client->msgr == NULL) { - if (ceph_test_opt(client, MYIP)) - myaddr = &client->options->my_addr; - client->msgr = ceph_messenger_create(myaddr, - client->supported_features, - client->required_features); - if (IS_ERR(client->msgr)) { - client->msgr = NULL; - return PTR_ERR(client->msgr); - } - client->msgr->nocrc = ceph_test_opt(client, NOCRC); - } - - /* open session, and wait for mon and osd maps */ - err = ceph_monc_open_session(&client->monc); - if (err < 0) - return err; - - while (!have_mon_and_osd_map(client)) { - err = -EIO; - if (timeout && time_after_eq(jiffies, started + timeout)) - return err; - - /* wait */ - dout("mount waiting for mon_map\n"); - err = wait_event_interruptible_timeout(client->auth_wq, - have_mon_and_osd_map(client) || (client->auth_err < 0), - timeout); - if (err == -EINTR || err == -ERESTARTSYS) - return err; - if (client->auth_err < 0) - return client->auth_err; - } - - return 0; -} -EXPORT_SYMBOL(__ceph_open_session); - - -int ceph_open_session(struct ceph_client *client) -{ - int ret; - unsigned long started = jiffies; /* note the start time */ - - dout("open_session start\n"); - mutex_lock(&client->mount_mutex); - - ret = __ceph_open_session(client, started); - - mutex_unlock(&client->mount_mutex); - return ret; -} -EXPORT_SYMBOL(ceph_open_session); - - -static int __init init_ceph_lib(void) -{ - int ret = 0; - - ret = ceph_debugfs_init(); - if (ret < 0) - goto out; - - ret = ceph_msgr_init(); - if (ret < 0) - goto out_debugfs; - - pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", - CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, - CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, - CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); - - return 0; - -out_debugfs: - ceph_debugfs_cleanup(); -out: - return ret; -} - -static void __exit exit_ceph_lib(void) -{ - dout("exit_ceph_lib\n"); - ceph_msgr_exit(); - ceph_debugfs_cleanup(); -} - -module_init(init_ceph_lib); -module_exit(exit_ceph_lib); - -MODULE_AUTHOR("Sage Weil "); -MODULE_AUTHOR("Yehuda Sadeh "); -MODULE_AUTHOR("Patience Warnick "); -MODULE_DESCRIPTION("Ceph filesystem for Linux"); -MODULE_LICENSE("GPL"); diff --git a/trunk/net/ceph/ceph_strings.c b/trunk/net/ceph/ceph_strings.c deleted file mode 100644 index 3fbda04de29c..000000000000 --- a/trunk/net/ceph/ceph_strings.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Ceph string constants - */ -#include -#include - -const char *ceph_entity_type_name(int type) -{ - switch (type) { - case CEPH_ENTITY_TYPE_MDS: return "mds"; - case CEPH_ENTITY_TYPE_OSD: return "osd"; - case CEPH_ENTITY_TYPE_MON: return "mon"; - case CEPH_ENTITY_TYPE_CLIENT: return "client"; - case CEPH_ENTITY_TYPE_AUTH: return "auth"; - default: return "unknown"; - } -} - -const char *ceph_osd_op_name(int op) -{ - switch (op) { - case CEPH_OSD_OP_READ: return "read"; - case CEPH_OSD_OP_STAT: return "stat"; - - case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; - - case CEPH_OSD_OP_WRITE: return "write"; - case CEPH_OSD_OP_DELETE: return "delete"; - case CEPH_OSD_OP_TRUNCATE: return "truncate"; - case CEPH_OSD_OP_ZERO: return "zero"; - case CEPH_OSD_OP_WRITEFULL: return "writefull"; - case CEPH_OSD_OP_ROLLBACK: return "rollback"; - - case CEPH_OSD_OP_APPEND: return "append"; - case CEPH_OSD_OP_STARTSYNC: return "startsync"; - case CEPH_OSD_OP_SETTRUNC: return "settrunc"; - case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; - - case CEPH_OSD_OP_TMAPUP: return "tmapup"; - case CEPH_OSD_OP_TMAPGET: return "tmapget"; - case CEPH_OSD_OP_TMAPPUT: return "tmapput"; - - case CEPH_OSD_OP_GETXATTR: return "getxattr"; - case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; - case CEPH_OSD_OP_SETXATTR: return "setxattr"; - case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; - case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; - case CEPH_OSD_OP_RMXATTR: return "rmxattr"; - case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; - - case CEPH_OSD_OP_PULL: return "pull"; - case CEPH_OSD_OP_PUSH: return "push"; - case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; - case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; - case CEPH_OSD_OP_SCRUB: return "scrub"; - - case CEPH_OSD_OP_WRLOCK: return "wrlock"; - case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; - case CEPH_OSD_OP_RDLOCK: return "rdlock"; - case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; - case CEPH_OSD_OP_UPLOCK: return "uplock"; - case CEPH_OSD_OP_DNLOCK: return "dnlock"; - - case CEPH_OSD_OP_CALL: return "call"; - - case CEPH_OSD_OP_PGLS: return "pgls"; - } - return "???"; -} - - -const char *ceph_pool_op_name(int op) -{ - switch (op) { - case POOL_OP_CREATE: return "create"; - case POOL_OP_DELETE: return "delete"; - case POOL_OP_AUID_CHANGE: return "auid change"; - case POOL_OP_CREATE_SNAP: return "create snap"; - case POOL_OP_DELETE_SNAP: return "delete snap"; - case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; - case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; - } - return "???"; -} diff --git a/trunk/net/ceph/debugfs.c b/trunk/net/ceph/debugfs.c deleted file mode 100644 index 27d4ea315d12..000000000000 --- a/trunk/net/ceph/debugfs.c +++ /dev/null @@ -1,267 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#ifdef CONFIG_DEBUG_FS - -/* - * Implement /sys/kernel/debug/ceph fun - * - * /sys/kernel/debug/ceph/client* - an instance of the ceph client - * .../osdmap - current osdmap - * .../monmap - current monmap - * .../osdc - active osd requests - * .../monc - mon client state - * .../dentry_lru - dump contents of dentry lru - * .../caps - expose cap (reservation) stats - * .../bdi - symlink to ../../bdi/something - */ - -static struct dentry *ceph_debugfs_dir; - -static int monmap_show(struct seq_file *s, void *p) -{ - int i; - struct ceph_client *client = s->private; - - if (client->monc.monmap == NULL) - return 0; - - seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); - for (i = 0; i < client->monc.monmap->num_mon; i++) { - struct ceph_entity_inst *inst = - &client->monc.monmap->mon_inst[i]; - - seq_printf(s, "\t%s%lld\t%s\n", - ENTITY_NAME(inst->name), - ceph_pr_addr(&inst->addr.in_addr)); - } - return 0; -} - -static int osdmap_show(struct seq_file *s, void *p) -{ - int i; - struct ceph_client *client = s->private; - struct rb_node *n; - - if (client->osdc.osdmap == NULL) - return 0; - seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); - seq_printf(s, "flags%s%s\n", - (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? - " NEARFULL" : "", - (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? - " FULL" : ""); - for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { - struct ceph_pg_pool_info *pool = - rb_entry(n, struct ceph_pg_pool_info, node); - seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", - pool->id, pool->v.pg_num, pool->pg_num_mask, - pool->v.lpg_num, pool->lpg_num_mask); - } - for (i = 0; i < client->osdc.osdmap->max_osd; i++) { - struct ceph_entity_addr *addr = - &client->osdc.osdmap->osd_addr[i]; - int state = client->osdc.osdmap->osd_state[i]; - char sb[64]; - - seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", - i, ceph_pr_addr(&addr->in_addr), - ((client->osdc.osdmap->osd_weight[i]*100) >> 16), - ceph_osdmap_state_str(sb, sizeof(sb), state)); - } - return 0; -} - -static int monc_show(struct seq_file *s, void *p) -{ - struct ceph_client *client = s->private; - struct ceph_mon_generic_request *req; - struct ceph_mon_client *monc = &client->monc; - struct rb_node *rp; - - mutex_lock(&monc->mutex); - - if (monc->have_mdsmap) - seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap); - if (monc->have_osdmap) - seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap); - if (monc->want_next_osdmap) - seq_printf(s, "want next osdmap\n"); - - for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { - __u16 op; - req = rb_entry(rp, struct ceph_mon_generic_request, node); - op = le16_to_cpu(req->request->hdr.type); - if (op == CEPH_MSG_STATFS) - seq_printf(s, "%lld statfs\n", req->tid); - else - seq_printf(s, "%lld unknown\n", req->tid); - } - - mutex_unlock(&monc->mutex); - return 0; -} - -static int osdc_show(struct seq_file *s, void *pp) -{ - struct ceph_client *client = s->private; - struct ceph_osd_client *osdc = &client->osdc; - struct rb_node *p; - - mutex_lock(&osdc->request_mutex); - for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { - struct ceph_osd_request *req; - struct ceph_osd_request_head *head; - struct ceph_osd_op *op; - int num_ops; - int opcode, olen; - int i; - - req = rb_entry(p, struct ceph_osd_request, r_node); - - seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, - req->r_osd ? req->r_osd->o_osd : -1, - le32_to_cpu(req->r_pgid.pool), - le16_to_cpu(req->r_pgid.ps)); - - head = req->r_request->front.iov_base; - op = (void *)(head + 1); - - num_ops = le16_to_cpu(head->num_ops); - olen = le32_to_cpu(head->object_len); - seq_printf(s, "%.*s", olen, - (const char *)(head->ops + num_ops)); - - if (req->r_reassert_version.epoch) - seq_printf(s, "\t%u'%llu", - (unsigned)le32_to_cpu(req->r_reassert_version.epoch), - le64_to_cpu(req->r_reassert_version.version)); - else - seq_printf(s, "\t"); - - for (i = 0; i < num_ops; i++) { - opcode = le16_to_cpu(op->op); - seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); - op++; - } - - seq_printf(s, "\n"); - } - mutex_unlock(&osdc->request_mutex); - return 0; -} - -CEPH_DEFINE_SHOW_FUNC(monmap_show) -CEPH_DEFINE_SHOW_FUNC(osdmap_show) -CEPH_DEFINE_SHOW_FUNC(monc_show) -CEPH_DEFINE_SHOW_FUNC(osdc_show) - -int ceph_debugfs_init(void) -{ - ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); - if (!ceph_debugfs_dir) - return -ENOMEM; - return 0; -} - -void ceph_debugfs_cleanup(void) -{ - debugfs_remove(ceph_debugfs_dir); -} - -int ceph_debugfs_client_init(struct ceph_client *client) -{ - int ret = -ENOMEM; - char name[80]; - - snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, - client->monc.auth->global_id); - - client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); - if (!client->debugfs_dir) - goto out; - - client->monc.debugfs_file = debugfs_create_file("monc", - 0600, - client->debugfs_dir, - client, - &monc_show_fops); - if (!client->monc.debugfs_file) - goto out; - - client->osdc.debugfs_file = debugfs_create_file("osdc", - 0600, - client->debugfs_dir, - client, - &osdc_show_fops); - if (!client->osdc.debugfs_file) - goto out; - - client->debugfs_monmap = debugfs_create_file("monmap", - 0600, - client->debugfs_dir, - client, - &monmap_show_fops); - if (!client->debugfs_monmap) - goto out; - - client->debugfs_osdmap = debugfs_create_file("osdmap", - 0600, - client->debugfs_dir, - client, - &osdmap_show_fops); - if (!client->debugfs_osdmap) - goto out; - - return 0; - -out: - ceph_debugfs_client_cleanup(client); - return ret; -} - -void ceph_debugfs_client_cleanup(struct ceph_client *client) -{ - debugfs_remove(client->debugfs_osdmap); - debugfs_remove(client->debugfs_monmap); - debugfs_remove(client->osdc.debugfs_file); - debugfs_remove(client->monc.debugfs_file); - debugfs_remove(client->debugfs_dir); -} - -#else /* CONFIG_DEBUG_FS */ - -int ceph_debugfs_init(void) -{ - return 0; -} - -void ceph_debugfs_cleanup(void) -{ -} - -int ceph_debugfs_client_init(struct ceph_client *client) -{ - return 0; -} - -void ceph_debugfs_client_cleanup(struct ceph_client *client) -{ -} - -#endif /* CONFIG_DEBUG_FS */ - -EXPORT_SYMBOL(ceph_debugfs_init); -EXPORT_SYMBOL(ceph_debugfs_cleanup); diff --git a/trunk/net/ceph/pagelist.c b/trunk/net/ceph/pagelist.c deleted file mode 100644 index 13cb409a7bba..000000000000 --- a/trunk/net/ceph/pagelist.c +++ /dev/null @@ -1,154 +0,0 @@ - -#include -#include -#include -#include -#include - -static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) -{ - if (pl->mapped_tail) { - struct page *page = list_entry(pl->head.prev, struct page, lru); - kunmap(page); - pl->mapped_tail = NULL; - } -} - -int ceph_pagelist_release(struct ceph_pagelist *pl) -{ - ceph_pagelist_unmap_tail(pl); - while (!list_empty(&pl->head)) { - struct page *page = list_first_entry(&pl->head, struct page, - lru); - list_del(&page->lru); - __free_page(page); - } - ceph_pagelist_free_reserve(pl); - return 0; -} -EXPORT_SYMBOL(ceph_pagelist_release); - -static int ceph_pagelist_addpage(struct ceph_pagelist *pl) -{ - struct page *page; - - if (!pl->num_pages_free) { - page = __page_cache_alloc(GFP_NOFS); - } else { - page = list_first_entry(&pl->free_list, struct page, lru); - list_del(&page->lru); - --pl->num_pages_free; - } - if (!page) - return -ENOMEM; - pl->room += PAGE_SIZE; - ceph_pagelist_unmap_tail(pl); - list_add_tail(&page->lru, &pl->head); - pl->mapped_tail = kmap(page); - return 0; -} - -int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len) -{ - while (pl->room < len) { - size_t bit = pl->room; - int ret; - - memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), - buf, bit); - pl->length += bit; - pl->room -= bit; - buf += bit; - len -= bit; - ret = ceph_pagelist_addpage(pl); - if (ret) - return ret; - } - - memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); - pl->length += len; - pl->room -= len; - return 0; -} -EXPORT_SYMBOL(ceph_pagelist_append); - -/** - * Allocate enough pages for a pagelist to append the given amount - * of data without without allocating. - * Returns: 0 on success, -ENOMEM on error. - */ -int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space) -{ - if (space <= pl->room) - return 0; - space -= pl->room; - space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT; /* conv to num pages */ - - while (space > pl->num_pages_free) { - struct page *page = __page_cache_alloc(GFP_NOFS); - if (!page) - return -ENOMEM; - list_add_tail(&page->lru, &pl->free_list); - ++pl->num_pages_free; - } - return 0; -} -EXPORT_SYMBOL(ceph_pagelist_reserve); - -/** - * Free any pages that have been preallocated. - */ -int ceph_pagelist_free_reserve(struct ceph_pagelist *pl) -{ - while (!list_empty(&pl->free_list)) { - struct page *page = list_first_entry(&pl->free_list, - struct page, lru); - list_del(&page->lru); - __free_page(page); - --pl->num_pages_free; - } - BUG_ON(pl->num_pages_free); - return 0; -} -EXPORT_SYMBOL(ceph_pagelist_free_reserve); - -/** - * Create a truncation point. - */ -void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, - struct ceph_pagelist_cursor *c) -{ - c->pl = pl; - c->page_lru = pl->head.prev; - c->room = pl->room; -} -EXPORT_SYMBOL(ceph_pagelist_set_cursor); - -/** - * Truncate a pagelist to the given point. Move extra pages to reserve. - * This won't sleep. - * Returns: 0 on success, - * -EINVAL if the pagelist doesn't match the trunc point pagelist - */ -int ceph_pagelist_truncate(struct ceph_pagelist *pl, - struct ceph_pagelist_cursor *c) -{ - struct page *page; - - if (pl != c->pl) - return -EINVAL; - ceph_pagelist_unmap_tail(pl); - while (pl->head.prev != c->page_lru) { - page = list_entry(pl->head.prev, struct page, lru); - list_del(&page->lru); /* remove from pagelist */ - list_add_tail(&page->lru, &pl->free_list); /* add to reserve */ - ++pl->num_pages_free; - } - pl->room = c->room; - if (!list_empty(&pl->head)) { - page = list_entry(pl->head.prev, struct page, lru); - pl->mapped_tail = kmap(page); - } - return 0; -} -EXPORT_SYMBOL(ceph_pagelist_truncate); diff --git a/trunk/net/ceph/pagevec.c b/trunk/net/ceph/pagevec.c deleted file mode 100644 index 54caf0687155..000000000000 --- a/trunk/net/ceph/pagevec.c +++ /dev/null @@ -1,223 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include - -#include - -/* - * build a vector of user pages - */ -struct page **ceph_get_direct_page_vector(const char __user *data, - int num_pages, - loff_t off, size_t len) -{ - struct page **pages; - int rc; - - pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); - if (!pages) - return ERR_PTR(-ENOMEM); - - down_read(¤t->mm->mmap_sem); - rc = get_user_pages(current, current->mm, (unsigned long)data, - num_pages, 0, 0, pages, NULL); - up_read(¤t->mm->mmap_sem); - if (rc < 0) - goto fail; - return pages; - -fail: - kfree(pages); - return ERR_PTR(rc); -} -EXPORT_SYMBOL(ceph_get_direct_page_vector); - -void ceph_put_page_vector(struct page **pages, int num_pages) -{ - int i; - - for (i = 0; i < num_pages; i++) - put_page(pages[i]); - kfree(pages); -} -EXPORT_SYMBOL(ceph_put_page_vector); - -void ceph_release_page_vector(struct page **pages, int num_pages) -{ - int i; - - for (i = 0; i < num_pages; i++) - __free_pages(pages[i], 0); - kfree(pages); -} -EXPORT_SYMBOL(ceph_release_page_vector); - -/* - * allocate a vector new pages - */ -struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) -{ - struct page **pages; - int i; - - pages = kmalloc(sizeof(*pages) * num_pages, flags); - if (!pages) - return ERR_PTR(-ENOMEM); - for (i = 0; i < num_pages; i++) { - pages[i] = __page_cache_alloc(flags); - if (pages[i] == NULL) { - ceph_release_page_vector(pages, i); - return ERR_PTR(-ENOMEM); - } - } - return pages; -} -EXPORT_SYMBOL(ceph_alloc_page_vector); - -/* - * copy user data into a page vector - */ -int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, - loff_t off, size_t len) -{ - int i = 0; - int po = off & ~PAGE_CACHE_MASK; - int left = len; - int l, bad; - - while (left > 0) { - l = min_t(int, PAGE_CACHE_SIZE-po, left); - bad = copy_from_user(page_address(pages[i]) + po, data, l); - if (bad == l) - return -EFAULT; - data += l - bad; - left -= l - bad; - po += l - bad; - if (po == PAGE_CACHE_SIZE) { - po = 0; - i++; - } - } - return len; -} -EXPORT_SYMBOL(ceph_copy_user_to_page_vector); - -int ceph_copy_to_page_vector(struct page **pages, - const char *data, - loff_t off, size_t len) -{ - int i = 0; - size_t po = off & ~PAGE_CACHE_MASK; - size_t left = len; - size_t l; - - while (left > 0) { - l = min_t(size_t, PAGE_CACHE_SIZE-po, left); - memcpy(page_address(pages[i]) + po, data, l); - data += l; - left -= l; - po += l; - if (po == PAGE_CACHE_SIZE) { - po = 0; - i++; - } - } - return len; -} -EXPORT_SYMBOL(ceph_copy_to_page_vector); - -int ceph_copy_from_page_vector(struct page **pages, - char *data, - loff_t off, size_t len) -{ - int i = 0; - size_t po = off & ~PAGE_CACHE_MASK; - size_t left = len; - size_t l; - - while (left > 0) { - l = min_t(size_t, PAGE_CACHE_SIZE-po, left); - memcpy(data, page_address(pages[i]) + po, l); - data += l; - left -= l; - po += l; - if (po == PAGE_CACHE_SIZE) { - po = 0; - i++; - } - } - return len; -} -EXPORT_SYMBOL(ceph_copy_from_page_vector); - -/* - * copy user data from a page vector into a user pointer - */ -int ceph_copy_page_vector_to_user(struct page **pages, - char __user *data, - loff_t off, size_t len) -{ - int i = 0; - int po = off & ~PAGE_CACHE_MASK; - int left = len; - int l, bad; - - while (left > 0) { - l = min_t(int, left, PAGE_CACHE_SIZE-po); - bad = copy_to_user(data, page_address(pages[i]) + po, l); - if (bad == l) - return -EFAULT; - data += l - bad; - left -= l - bad; - if (po) { - po += l - bad; - if (po == PAGE_CACHE_SIZE) - po = 0; - } - i++; - } - return len; -} -EXPORT_SYMBOL(ceph_copy_page_vector_to_user); - -/* - * Zero an extent within a page vector. Offset is relative to the - * start of the first page. - */ -void ceph_zero_page_vector_range(int off, int len, struct page **pages) -{ - int i = off >> PAGE_CACHE_SHIFT; - - off &= ~PAGE_CACHE_MASK; - - dout("zero_page_vector_page %u~%u\n", off, len); - - /* leading partial page? */ - if (off) { - int end = min((int)PAGE_CACHE_SIZE, off + len); - dout("zeroing %d %p head from %d\n", i, pages[i], - (int)off); - zero_user_segment(pages[i], off, end); - len -= (end - off); - i++; - } - while (len >= PAGE_CACHE_SIZE) { - dout("zeroing %d %p len=%d\n", i, pages[i], len); - zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); - len -= PAGE_CACHE_SIZE; - i++; - } - /* trailing partial page? */ - if (len) { - dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); - zero_user_segment(pages[i], 0, len); - } -} -EXPORT_SYMBOL(ceph_zero_page_vector_range); - diff --git a/trunk/security/tomoyo/common.c b/trunk/security/tomoyo/common.c index c668b447c725..e0a1059aaf3a 100644 --- a/trunk/security/tomoyo/common.c +++ b/trunk/security/tomoyo/common.c @@ -768,8 +768,10 @@ static bool tomoyo_select_one(struct tomoyo_io_buffer *head, const char *data) return true; /* Do nothing if open(O_WRONLY). */ memset(&head->r, 0, sizeof(head->r)); head->r.print_this_domain_only = true; - head->r.eof = !domain; - head->r.domain = &domain->list; + if (domain) + head->r.domain = &domain->list; + else + head->r.eof = 1; tomoyo_io_printf(head, "# select %s\n", data); if (domain && domain->is_deleted) tomoyo_io_printf(head, "# This is a deleted domain.\n");