Skip to content

Commit

Permalink
ceph: pre-allocate data structure that tracks caps flushing
Browse files Browse the repository at this point in the history
Signed-off-by: Yan, Zheng <zyan@redhat.com>
  • Loading branch information
Yan, Zheng authored and Ilya Dryomov committed Jun 25, 2015
1 parent e548e9b commit f66fd9f
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 16 deletions.
19 changes: 15 additions & 4 deletions fs/ceph/addr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1308,12 +1308,17 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
struct ceph_cap_flush *prealloc_cf;
struct page *page = vmf->page;
loff_t off = page_offset(page);
loff_t size = i_size_read(inode);
size_t len;
int want, got, ret;

prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
return VM_FAULT_SIGBUS;

if (ci->i_inline_version != CEPH_INLINE_NONE) {
struct page *locked_page = NULL;
if (off == 0) {
Expand All @@ -1323,8 +1328,10 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = ceph_uninline_data(vma->vm_file, locked_page);
if (locked_page)
unlock_page(locked_page);
if (ret < 0)
return VM_FAULT_SIGBUS;
if (ret < 0) {
ret = VM_FAULT_SIGBUS;
goto out_free;
}
}

if (off + PAGE_CACHE_SIZE <= size)
Expand All @@ -1346,7 +1353,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
break;
if (ret != -ERESTARTSYS) {
WARN_ON(1);
return VM_FAULT_SIGBUS;
ret = VM_FAULT_SIGBUS;
goto out_free;
}
}
dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
Expand Down Expand Up @@ -1381,7 +1389,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
int dirty;
spin_lock(&ci->i_ceph_lock);
ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
&prealloc_cf);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
__mark_inode_dirty(inode, dirty);
Expand All @@ -1390,6 +1399,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
inode, off, len, ceph_cap_string(got), ret);
ceph_put_cap_refs(ci, got);
out_free:
ceph_free_cap_flush(prealloc_cf);

return ret;
}
Expand Down
26 changes: 22 additions & 4 deletions fs/ceph/caps.c
Original file line number Diff line number Diff line change
Expand Up @@ -1356,7 +1356,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
* Caller is then responsible for calling __mark_inode_dirty with the
* returned flags value.
*/
int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
struct ceph_cap_flush **pcf)
{
struct ceph_mds_client *mdsc =
ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
Expand All @@ -1376,6 +1377,9 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
ceph_cap_string(was | mask));
ci->i_dirty_caps |= mask;
if (was == 0) {
WARN_ON_ONCE(ci->i_prealloc_cap_flush);
swap(ci->i_prealloc_cap_flush, *pcf);

if (!ci->i_head_snapc) {
WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem));
ci->i_head_snapc = ceph_get_snap_context(
Expand All @@ -1391,6 +1395,8 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
ihold(inode);
dirty |= I_DIRTY_SYNC;
}
} else {
WARN_ON_ONCE(!ci->i_prealloc_cap_flush);
}
BUG_ON(list_empty(&ci->i_dirty_item));
if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
Expand Down Expand Up @@ -1446,6 +1452,17 @@ static void __add_cap_flushing_to_mdsc(struct ceph_mds_client *mdsc,
rb_insert_color(&cf->g_node, &mdsc->cap_flush_tree);
}

struct ceph_cap_flush *ceph_alloc_cap_flush(void)
{
return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
}

void ceph_free_cap_flush(struct ceph_cap_flush *cf)
{
if (cf)
kmem_cache_free(ceph_cap_flush_cachep, cf);
}

static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc)
{
struct rb_node *n = rb_first(&mdsc->cap_flush_tree);
Expand All @@ -1469,11 +1486,12 @@ static int __mark_caps_flushing(struct inode *inode,
{
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_cap_flush *cf;
struct ceph_cap_flush *cf = NULL;
int flushing;

BUG_ON(ci->i_dirty_caps == 0);
BUG_ON(list_empty(&ci->i_dirty_item));
BUG_ON(!ci->i_prealloc_cap_flush);

flushing = ci->i_dirty_caps;
dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n",
Expand All @@ -1484,7 +1502,7 @@ static int __mark_caps_flushing(struct inode *inode,
ci->i_dirty_caps = 0;
dout(" inode %p now !dirty\n", inode);

cf = kmalloc(sizeof(*cf), GFP_ATOMIC);
swap(cf, ci->i_prealloc_cap_flush);
cf->caps = flushing;
cf->kick = false;

Expand Down Expand Up @@ -3075,7 +3093,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
cf = list_first_entry(&to_remove,
struct ceph_cap_flush, list);
list_del(&cf->list);
kfree(cf);
ceph_free_cap_flush(cf);
}
if (drop)
iput(inode);
Expand Down
18 changes: 16 additions & 2 deletions fs/ceph/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -939,13 +939,18 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_sb_to_client(inode->i_sb)->client->osdc;
struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0;
int err, want, got;
loff_t pos;

if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;

prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
return -ENOMEM;

mutex_lock(&inode->i_mutex);

/* We can write back this queue in page reclaim */
Expand Down Expand Up @@ -1050,7 +1055,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
int dirty;
spin_lock(&ci->i_ceph_lock);
ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
&prealloc_cf);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
__mark_inode_dirty(inode, dirty);
Expand All @@ -1074,6 +1080,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
out:
mutex_unlock(&inode->i_mutex);
out_unlocked:
ceph_free_cap_flush(prealloc_cf);
current->backing_dev_info = NULL;
return written ? written : err;
}
Expand Down Expand Up @@ -1270,6 +1277,7 @@ static long ceph_fallocate(struct file *file, int mode,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
struct ceph_cap_flush *prealloc_cf;
int want, got = 0;
int dirty;
int ret = 0;
Expand All @@ -1282,6 +1290,10 @@ static long ceph_fallocate(struct file *file, int mode,
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;

prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
return -ENOMEM;

mutex_lock(&inode->i_mutex);

if (ceph_snap(inode) != CEPH_NOSNAP) {
Expand Down Expand Up @@ -1328,7 +1340,8 @@ static long ceph_fallocate(struct file *file, int mode,
if (!ret) {
spin_lock(&ci->i_ceph_lock);
ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
&prealloc_cf);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
__mark_inode_dirty(inode, dirty);
Expand All @@ -1337,6 +1350,7 @@ static long ceph_fallocate(struct file *file, int mode,
ceph_put_cap_refs(ci, got);
unlock:
mutex_unlock(&inode->i_mutex);
ceph_free_cap_flush(prealloc_cf);
return ret;
}

Expand Down
15 changes: 13 additions & 2 deletions fs/ceph/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_flushing_caps = 0;
INIT_LIST_HEAD(&ci->i_dirty_item);
INIT_LIST_HEAD(&ci->i_flushing_item);
ci->i_prealloc_cap_flush = NULL;
ci->i_cap_flush_tree = RB_ROOT;
init_waitqueue_head(&ci->i_cap_wq);
ci->i_hold_caps_min = 0;
Expand Down Expand Up @@ -1720,6 +1721,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
const unsigned int ia_valid = attr->ia_valid;
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
struct ceph_cap_flush *prealloc_cf;
int issued;
int release = 0, dirtied = 0;
int mask = 0;
Expand All @@ -1734,10 +1736,16 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
if (err != 0)
return err;

prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
return -ENOMEM;

req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR,
USE_AUTH_MDS);
if (IS_ERR(req))
if (IS_ERR(req)) {
ceph_free_cap_flush(prealloc_cf);
return PTR_ERR(req);
}

spin_lock(&ci->i_ceph_lock);
issued = __ceph_caps_issued(ci, NULL);
Expand Down Expand Up @@ -1895,7 +1903,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
dout("setattr %p ATTR_FILE ... hrm!\n", inode);

if (dirtied) {
inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
&prealloc_cf);
inode->i_ctime = CURRENT_TIME;
}

Expand Down Expand Up @@ -1927,9 +1936,11 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
ceph_mdsc_put_request(req);
if (mask & CEPH_SETATTR_SIZE)
__ceph_do_pending_vmtruncate(inode);
ceph_free_cap_flush(prealloc_cf);
return err;
out_put:
ceph_mdsc_put_request(req);
ceph_free_cap_flush(prealloc_cf);
return err;
}

Expand Down
6 changes: 5 additions & 1 deletion fs/ceph/mds_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -1189,14 +1189,18 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
}
spin_unlock(&mdsc->cap_dirty_lock);

if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
list_add(&ci->i_prealloc_cap_flush->list, &to_remove);
ci->i_prealloc_cap_flush = NULL;
}
}
spin_unlock(&ci->i_ceph_lock);
while (!list_empty(&to_remove)) {
struct ceph_cap_flush *cf;
cf = list_first_entry(&to_remove,
struct ceph_cap_flush, list);
list_del(&cf->list);
kfree(cf);
ceph_free_cap_flush(cf);
}
while (drop--)
iput(inode);
Expand Down
8 changes: 8 additions & 0 deletions fs/ceph/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
*/
struct kmem_cache *ceph_inode_cachep;
struct kmem_cache *ceph_cap_cachep;
struct kmem_cache *ceph_cap_flush_cachep;
struct kmem_cache *ceph_dentry_cachep;
struct kmem_cache *ceph_file_cachep;

Expand All @@ -647,6 +648,10 @@ static int __init init_caches(void)
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
if (ceph_cap_cachep == NULL)
goto bad_cap;
ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
if (ceph_cap_flush_cachep == NULL)
goto bad_cap_flush;

ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
Expand All @@ -665,6 +670,8 @@ static int __init init_caches(void)
bad_file:
kmem_cache_destroy(ceph_dentry_cachep);
bad_dentry:
kmem_cache_destroy(ceph_cap_flush_cachep);
bad_cap_flush:
kmem_cache_destroy(ceph_cap_cachep);
bad_cap:
kmem_cache_destroy(ceph_inode_cachep);
Expand All @@ -681,6 +688,7 @@ static void destroy_caches(void)

kmem_cache_destroy(ceph_inode_cachep);
kmem_cache_destroy(ceph_cap_cachep);
kmem_cache_destroy(ceph_cap_flush_cachep);
kmem_cache_destroy(ceph_dentry_cachep);
kmem_cache_destroy(ceph_file_cachep);

Expand Down
6 changes: 5 additions & 1 deletion fs/ceph/super.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ struct ceph_inode_info {
/* we need to track cap writeback on a per-cap-bit basis, to allow
* overlapping, pipelined cap flushes to the mds. we can probably
* reduce the tid to 8 bits if we're concerned about inode size. */
struct ceph_cap_flush *i_prealloc_cap_flush;
struct rb_root i_cap_flush_tree;
wait_queue_head_t i_cap_wq; /* threads waiting on a capability */
unsigned long i_hold_caps_min; /* jiffies */
Expand Down Expand Up @@ -578,7 +579,10 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
{
return ci->i_dirty_caps | ci->i_flushing_caps;
}
extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
extern struct ceph_cap_flush *ceph_alloc_cap_flush(void);
extern void ceph_free_cap_flush(struct ceph_cap_flush *cf);
extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
struct ceph_cap_flush **pcf);

extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
struct ceph_cap *ocap, int mask);
Expand Down
Loading

0 comments on commit f66fd9f

Please sign in to comment.