Skip to content

Commit

Permalink
Merge tag 'ceph-for-4.13-rc1' of git://github.com/ceph/ceph-client
Browse files Browse the repository at this point in the history
Pull ceph updates from Ilya Dryomov:
 "The main item here is support for v12.y.z ("Luminous") clusters:
  RESEND_ON_SPLIT, RADOS_BACKOFF, OSDMAP_PG_UPMAP and CRUSH_CHOOSE_ARGS
  feature bits, and various other changes in the RADOS client protocol.

  On top of that we have a new fsc mount option to allow supplying
  fscache uniquifier (similar to NFS) and the usual pile of filesystem
  fixes from Zheng"

* tag 'ceph-for-4.13-rc1' of git://github.com/ceph/ceph-client: (44 commits)
  libceph: advertise support for NEW_OSDOP_ENCODING and SERVER_LUMINOUS
  libceph: osd_state is 32 bits wide in luminous
  crush: remove an obsolete comment
  crush: crush_init_workspace starts with struct crush_work
  libceph, crush: per-pool crush_choose_arg_map for crush_do_rule()
  crush: implement weight and id overrides for straw2
  libceph: apply_upmap()
  libceph: compute actual pgid in ceph_pg_to_up_acting_osds()
  libceph: pg_upmap[_items] infrastructure
  libceph: ceph_decode_skip_* helpers
  libceph: kill __{insert,lookup,remove}_pg_mapping()
  libceph: introduce and switch to decode_pg_mapping()
  libceph: don't pass pgid by value
  libceph: respect RADOS_BACKOFF backoffs
  libceph: make DEFINE_RB_* helpers more general
  libceph: avoid unnecessary pi lookups in calc_target()
  libceph: use target pi for calc_target() calculations
  libceph: always populate t->target_{oid,oloc} in calc_target()
  libceph: make sure need_resend targets reflect latest map
  libceph: delete from need_resend_linger before check_linger_pool_dne()
  ...
  • Loading branch information
Linus Torvalds committed Jul 11, 2017
2 parents 07d306c + 33e9c8d commit 3bf7878
Show file tree
Hide file tree
Showing 28 changed files with 2,308 additions and 476 deletions.
21 changes: 9 additions & 12 deletions fs/ceph/addr.c
Original file line number Diff line number Diff line change
Expand Up @@ -530,14 +530,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
long writeback_stat;
u64 truncate_size;
u32 truncate_seq;
int err = 0, len = PAGE_SIZE;
int err, len = PAGE_SIZE;

dout("writepage %p idx %lu\n", page, page->index);

if (!page->mapping || !page->mapping->host) {
dout("writepage %p - no mapping\n", page);
return -EFAULT;
}
inode = page->mapping->host;
ci = ceph_inode(inode);
fsc = ceph_inode_to_client(inode);
Expand All @@ -547,17 +543,18 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
snapc = page_snap_context(page);
if (snapc == NULL) {
dout("writepage %p page %p not dirty?\n", inode, page);
goto out;
return 0;
}
oldest = get_oldest_context(inode, &snap_size,
&truncate_size, &truncate_seq);
if (snapc->seq > oldest->seq) {
dout("writepage %p page %p snapc %p not writeable - noop\n",
inode, page, snapc);
/* we should only noop if called by kswapd */
WARN_ON((current->flags & PF_MEMALLOC) == 0);
WARN_ON(!(current->flags & PF_MEMALLOC));
ceph_put_snap_context(oldest);
goto out;
redirty_page_for_writepage(wbc, page);
return 0;
}
ceph_put_snap_context(oldest);

Expand All @@ -567,8 +564,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
/* is this a partial page at end of file? */
if (page_off >= snap_size) {
dout("%p page eof %llu\n", page, snap_size);
goto out;
return 0;
}

if (snap_size < page_off + len)
len = snap_size - page_off;

Expand All @@ -595,7 +593,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage interrupted page %p\n", page);
redirty_page_for_writepage(wbc, page);
end_page_writeback(page);
goto out;
return err;
}
dout("writepage setting page/mapping error %d %p\n",
err, page);
Expand All @@ -611,7 +609,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
end_page_writeback(page);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
ceph_put_snap_context(snapc); /* page's reference */
out:
return err;
}

Expand Down Expand Up @@ -1318,7 +1315,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct inode *inode = file_inode(file);
int check_cap = 0;
bool check_cap = false;

dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
inode, page, (int)pos, (int)copied, (int)len);
Expand Down
92 changes: 83 additions & 9 deletions fs/ceph/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,34 @@ struct fscache_netfs ceph_cache_netfs = {
.version = 0,
};

static DEFINE_MUTEX(ceph_fscache_lock);
static LIST_HEAD(ceph_fscache_list);

struct ceph_fscache_entry {
struct list_head list;
struct fscache_cookie *fscache;
struct ceph_fsid fsid;
size_t uniq_len;
char uniquifier[0];
};

static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t maxbuf)
{
const struct ceph_fs_client* fsc = cookie_netfs_data;
uint16_t klen;
const char *fscache_uniq = fsc->mount_options->fscache_uniq;
uint16_t fsid_len, uniq_len;

klen = sizeof(fsc->client->fsid);
if (klen > maxbuf)
fsid_len = sizeof(fsc->client->fsid);
uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0;
if (fsid_len + uniq_len > maxbuf)
return 0;

memcpy(buffer, &fsc->client->fsid, klen);
return klen;
memcpy(buffer, &fsc->client->fsid, fsid_len);
if (uniq_len)
memcpy(buffer + fsid_len, fscache_uniq, uniq_len);

return fsid_len + uniq_len;
}

static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
Expand All @@ -67,13 +83,54 @@ void ceph_fscache_unregister(void)

int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
{
const struct ceph_fsid *fsid = &fsc->client->fsid;
const char *fscache_uniq = fsc->mount_options->fscache_uniq;
size_t uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0;
struct ceph_fscache_entry *ent;
int err = 0;

mutex_lock(&ceph_fscache_lock);
list_for_each_entry(ent, &ceph_fscache_list, list) {
if (memcmp(&ent->fsid, fsid, sizeof(*fsid)))
continue;
if (ent->uniq_len != uniq_len)
continue;
if (uniq_len && memcmp(ent->uniquifier, fscache_uniq, uniq_len))
continue;

pr_err("fscache cookie already registered for fsid %pU\n", fsid);
pr_err(" use fsc=%%s mount option to specify a uniquifier\n");
err = -EBUSY;
goto out_unlock;
}

ent = kzalloc(sizeof(*ent) + uniq_len, GFP_KERNEL);
if (!ent) {
err = -ENOMEM;
goto out_unlock;
}

fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
&ceph_fscache_fsid_object_def,
fsc, true);
if (!fsc->fscache)
pr_err("Unable to register fsid: %p fscache cookie\n", fsc);

return 0;
if (fsc->fscache) {
memcpy(&ent->fsid, fsid, sizeof(*fsid));
if (uniq_len > 0) {
memcpy(&ent->uniquifier, fscache_uniq, uniq_len);
ent->uniq_len = uniq_len;
}
ent->fscache = fsc->fscache;
list_add_tail(&ent->list, &ceph_fscache_list);
} else {
kfree(ent);
pr_err("unable to register fscache cookie for fsid %pU\n",
fsid);
/* all other fs ignore this error */
}
out_unlock:
mutex_unlock(&ceph_fscache_lock);
return err;
}

static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
Expand Down Expand Up @@ -349,7 +406,24 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)

void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
{
fscache_relinquish_cookie(fsc->fscache, 0);
if (fscache_cookie_valid(fsc->fscache)) {
struct ceph_fscache_entry *ent;
bool found = false;

mutex_lock(&ceph_fscache_lock);
list_for_each_entry(ent, &ceph_fscache_list, list) {
if (ent->fscache == fsc->fscache) {
list_del(&ent->list);
kfree(ent);
found = true;
break;
}
}
WARN_ON_ONCE(!found);
mutex_unlock(&ceph_fscache_lock);

__fscache_relinquish_cookie(fsc->fscache, 0);
}
fsc->fscache = NULL;
}

Expand Down
40 changes: 33 additions & 7 deletions fs/ceph/caps.c
Original file line number Diff line number Diff line change
Expand Up @@ -1653,6 +1653,21 @@ static int try_nonblocking_invalidate(struct inode *inode)
return -1;
}

bool __ceph_should_report_size(struct ceph_inode_info *ci)
{
loff_t size = ci->vfs_inode.i_size;
/* mds will adjust max size according to the reported size */
if (ci->i_flushing_caps & CEPH_CAP_FILE_WR)
return false;
if (size >= ci->i_max_size)
return true;
/* half of previous max_size increment has been used */
if (ci->i_max_size > ci->i_reported_size &&
(size << 1) >= ci->i_max_size + ci->i_reported_size)
return true;
return false;
}

/*
* Swiss army knife function to examine currently used and wanted
* versus held caps. Release, flush, ack revoked caps to mds as
Expand Down Expand Up @@ -1806,8 +1821,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
}

/* approaching file_max? */
if ((inode->i_size << 1) >= ci->i_max_size &&
(ci->i_reported_size << 1) < ci->i_max_size) {
if (__ceph_should_report_size(ci)) {
dout("i_size approaching max_size\n");
goto ack;
}
Expand Down Expand Up @@ -3027,8 +3041,10 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
le32_to_cpu(grant->truncate_seq),
le64_to_cpu(grant->truncate_size),
size);
/* max size increase? */
if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
}

if (ci->i_auth_cap == cap && (newcaps & CEPH_CAP_ANY_FILE_WR)) {
if (max_size != ci->i_max_size) {
dout("max_size %lld -> %llu\n",
ci->i_max_size, max_size);
ci->i_max_size = max_size;
Expand All @@ -3037,6 +3053,10 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
ci->i_requested_max_size = 0;
}
wake = true;
} else if (ci->i_wanted_max_size > ci->i_max_size &&
ci->i_wanted_max_size > ci->i_requested_max_size) {
/* CEPH_CAP_OP_IMPORT */
wake = true;
}
}

Expand Down Expand Up @@ -3554,7 +3574,6 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
}

/* make sure we re-request max_size, if necessary */
ci->i_wanted_max_size = 0;
ci->i_requested_max_size = 0;

*old_issued = issued;
Expand Down Expand Up @@ -3790,6 +3809,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
*/
void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
{
struct inode *inode;
struct ceph_inode_info *ci;
int flags = CHECK_CAPS_NODELAY;

Expand All @@ -3805,9 +3825,15 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
time_before(jiffies, ci->i_hold_caps_max))
break;
list_del_init(&ci->i_cap_delay_list);

inode = igrab(&ci->vfs_inode);
spin_unlock(&mdsc->cap_delay_lock);
dout("check_delayed_caps on %p\n", &ci->vfs_inode);
ceph_check_caps(ci, flags, NULL);

if (inode) {
dout("check_delayed_caps on %p\n", inode);
ceph_check_caps(ci, flags, NULL);
iput(inode);
}
}
spin_unlock(&mdsc->cap_delay_lock);
}
Expand Down
2 changes: 1 addition & 1 deletion fs/ceph/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1040,8 +1040,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
int num_pages;
int written = 0;
int flags;
int check_caps = 0;
int ret;
bool check_caps = false;
struct timespec mtime = current_time(inode);
size_t count = iov_iter_count(from);

Expand Down
18 changes: 10 additions & 8 deletions fs/ceph/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,7 @@ static void update_dentry_lease(struct dentry *dentry,
long unsigned ttl = from_time + (duration * HZ) / 1000;
long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
struct inode *dir;
struct ceph_mds_session *old_lease_session = NULL;

/*
* Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
Expand Down Expand Up @@ -1051,8 +1052,10 @@ static void update_dentry_lease(struct dentry *dentry,
time_before(ttl, di->time))
goto out_unlock; /* we already have a newer lease. */

if (di->lease_session && di->lease_session != session)
goto out_unlock;
if (di->lease_session && di->lease_session != session) {
old_lease_session = di->lease_session;
di->lease_session = NULL;
}

ceph_dentry_lru_touch(dentry);

Expand All @@ -1065,6 +1068,8 @@ static void update_dentry_lease(struct dentry *dentry,
di->time = ttl;
out_unlock:
spin_unlock(&dentry->d_lock);
if (old_lease_session)
ceph_put_mds_session(old_lease_session);
return;
}

Expand Down Expand Up @@ -1653,20 +1658,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
return err;
}

int ceph_inode_set_size(struct inode *inode, loff_t size)
bool ceph_inode_set_size(struct inode *inode, loff_t size)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int ret = 0;
bool ret;

spin_lock(&ci->i_ceph_lock);
dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
i_size_write(inode, size);
inode->i_blocks = calc_inode_blocks(size);

/* tell the MDS if we are approaching max_size */
if ((size << 1) >= ci->i_max_size &&
(ci->i_reported_size << 1) < ci->i_max_size)
ret = 1;
ret = __ceph_should_report_size(ci);

spin_unlock(&ci->i_ceph_lock);
return ret;
Expand Down
25 changes: 24 additions & 1 deletion fs/ceph/locks.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,29 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
req->r_tid);

mutex_lock(&mdsc->mutex);
if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
err = 0;
} else {
/*
* ensure we aren't running concurrently with
* ceph_fill_trace or ceph_readdir_prepopulate, which
* rely on locks (dir mutex) held by our caller.
*/
mutex_lock(&req->r_fill_mutex);
req->r_err = err;
set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
mutex_unlock(&req->r_fill_mutex);

if (!req->r_session) {
// haven't sent the request
err = 0;
}
}
mutex_unlock(&mdsc->mutex);
if (!err)
return 0;

intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
USE_AUTH_MDS);
if (IS_ERR(intr_req))
Expand All @@ -146,7 +169,7 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
if (err && err != -ERESTARTSYS)
return err;

wait_for_completion(&req->r_completion);
wait_for_completion_killable(&req->r_safe_completion);
return 0;
}

Expand Down
Loading

0 comments on commit 3bf7878

Please sign in to comment.