Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/sage/ceph-client

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
  ceph: use separate class for ceph sockets' sk_lock
  ceph: reserve one more caps space when doing readdir
  ceph: queue_cap_snap should always queue dirty context
  ceph: fix dentry reference leak in dcache readdir
  ceph: decode v5 of osdmap (pool names) [protocol change]
  ceph: fix ack counter reset on connection reset
  ceph: fix leaked inode ref due to snap metadata writeback race
  ceph: fix snap context reference leaks
  ceph: allow writeback of snapped pages older than 'oldest' snapc
  ceph: fix dentry rehashing on virtual .snap dir
  • Loading branch information
Linus Torvalds committed Apr 15, 2010
2 parents f5c07a2 + a6a5349 commit 96e35b4
Show file tree
Hide file tree
Showing 10 changed files with 212 additions and 134 deletions.
62 changes: 30 additions & 32 deletions fs/ceph/addr.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,16 +337,15 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
/*
* Get ref for the oldest snapc for an inode with dirty data... that is, the
* only snap context we are allowed to write back.
*
* Caller holds i_lock.
*/
static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
u64 *snap_size)
static struct ceph_snap_context *get_oldest_context(struct inode *inode,
u64 *snap_size)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc = NULL;
struct ceph_cap_snap *capsnap = NULL;

spin_lock(&inode->i_lock);
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
capsnap->context, capsnap->dirty_pages);
Expand All @@ -357,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
break;
}
}
if (!snapc && ci->i_snap_realm) {
snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
if (!snapc && ci->i_head_snapc) {
snapc = ceph_get_snap_context(ci->i_head_snapc);
dout(" head snapc %p has %d dirty pages\n",
snapc, ci->i_wrbuffer_ref_head);
}
return snapc;
}

static struct ceph_snap_context *get_oldest_context(struct inode *inode,
u64 *snap_size)
{
struct ceph_snap_context *snapc = NULL;

spin_lock(&inode->i_lock);
snapc = __get_oldest_context(inode, snap_size);
spin_unlock(&inode->i_lock);
return snapc;
}
Expand All @@ -392,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
int len = PAGE_CACHE_SIZE;
loff_t i_size;
int err = 0;
struct ceph_snap_context *snapc;
struct ceph_snap_context *snapc, *oldest;
u64 snap_size = 0;
long writeback_stat;

Expand All @@ -413,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage %p page %p not dirty?\n", inode, page);
goto out;
}
if (snapc != get_oldest_context(inode, &snap_size)) {
oldest = get_oldest_context(inode, &snap_size);
if (snapc->seq > oldest->seq) {
dout("writepage %p page %p snapc %p not writeable - noop\n",
inode, page, (void *)page->private);
/* we should only noop if called by kswapd */
WARN_ON((current->flags & PF_MEMALLOC) == 0);
ceph_put_snap_context(oldest);
goto out;
}
ceph_put_snap_context(oldest);

/* is this a partial page at end of file? */
if (snap_size)
Expand Down Expand Up @@ -458,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
ClearPagePrivate(page);
end_page_writeback(page);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
ceph_put_snap_context(snapc);
ceph_put_snap_context(snapc); /* page's reference */
out:
return err;
}
Expand Down Expand Up @@ -558,9 +550,9 @@ static void writepages_finish(struct ceph_osd_request *req,
dout("inode %p skipping page %p\n", inode, page);
wbc->pages_skipped++;
}
ceph_put_snap_context((void *)page->private);
page->private = 0;
ClearPagePrivate(page);
ceph_put_snap_context(snapc);
dout("unlocking %d %p\n", i, page);
end_page_writeback(page);

Expand Down Expand Up @@ -618,7 +610,7 @@ static int ceph_writepages_start(struct address_space *mapping,
int range_whole = 0;
int should_loop = 1;
pgoff_t max_pages = 0, max_pages_ever = 0;
struct ceph_snap_context *snapc = NULL, *last_snapc = NULL;
struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
struct pagevec pvec;
int done = 0;
int rc = 0;
Expand Down Expand Up @@ -770,9 +762,10 @@ static int ceph_writepages_start(struct address_space *mapping,
}

/* only if matching snap context */
if (snapc != (void *)page->private) {
dout("page snapc %p != oldest %p\n",
(void *)page->private, snapc);
pgsnapc = (void *)page->private;
if (pgsnapc->seq > snapc->seq) {
dout("page snapc %p %lld > oldest %p %lld\n",
pgsnapc, pgsnapc->seq, snapc, snapc->seq);
unlock_page(page);
if (!locked_pages)
continue; /* keep looking for snap */
Expand Down Expand Up @@ -914,7 +907,10 @@ static int context_is_writeable_or_written(struct inode *inode,
struct ceph_snap_context *snapc)
{
struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
return !oldest || snapc->seq <= oldest->seq;
int ret = !oldest || snapc->seq <= oldest->seq;

ceph_put_snap_context(oldest);
return ret;
}

/*
Expand All @@ -936,8 +932,8 @@ static int ceph_update_writeable_page(struct file *file,
int pos_in_page = pos & ~PAGE_CACHE_MASK;
int end_in_page = pos_in_page + len;
loff_t i_size;
struct ceph_snap_context *snapc;
int r;
struct ceph_snap_context *snapc, *oldest;

retry_locked:
/* writepages currently holds page lock, but if we change that later, */
Expand All @@ -947,23 +943,24 @@ static int ceph_update_writeable_page(struct file *file,
BUG_ON(!ci->i_snap_realm);
down_read(&mdsc->snap_rwsem);
BUG_ON(!ci->i_snap_realm->cached_context);
if (page->private &&
(void *)page->private != ci->i_snap_realm->cached_context) {
snapc = (void *)page->private;
if (snapc && snapc != ci->i_head_snapc) {
/*
* this page is already dirty in another (older) snap
* context! is it writeable now?
*/
snapc = get_oldest_context(inode, NULL);
oldest = get_oldest_context(inode, NULL);
up_read(&mdsc->snap_rwsem);

if (snapc != (void *)page->private) {
if (snapc->seq > oldest->seq) {
ceph_put_snap_context(oldest);
dout(" page %p snapc %p not current or oldest\n",
page, (void *)page->private);
page, snapc);
/*
* queue for writeback, and wait for snapc to
* be writeable or written
*/
snapc = ceph_get_snap_context((void *)page->private);
snapc = ceph_get_snap_context(snapc);
unlock_page(page);
ceph_queue_writeback(inode);
r = wait_event_interruptible(ci->i_cap_wq,
Expand All @@ -973,6 +970,7 @@ static int ceph_update_writeable_page(struct file *file,
return r;
return -EAGAIN;
}
ceph_put_snap_context(oldest);

/* yay, writeable, do it now (without dropping page lock) */
dout(" page %p snapc %p not current, but oldest\n",
Expand Down
42 changes: 32 additions & 10 deletions fs/ceph/caps.c
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,12 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
if (capsnap->dirty_pages || capsnap->writing)
continue;

/*
* if cap writeback already occurred, we should have dropped
* the capsnap in ceph_put_wrbuffer_cap_refs.
*/
BUG_ON(capsnap->dirty == 0);

/* pick mds, take s_mutex */
mds = __ceph_get_cap_mds(ci, &mseq);
if (session && session->s_mds != mds) {
Expand Down Expand Up @@ -2118,8 +2124,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
}
spin_unlock(&inode->i_lock);

dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had),
last ? "last" : "");
dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
last ? " last" : "", put ? " put" : "");

if (last && !flushsnaps)
ceph_check_caps(ci, 0, NULL);
Expand All @@ -2143,7 +2149,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
{
struct inode *inode = &ci->vfs_inode;
int last = 0;
int last_snap = 0;
int complete_capsnap = 0;
int drop_capsnap = 0;
int found = 0;
struct ceph_cap_snap *capsnap = NULL;

Expand All @@ -2166,30 +2173,45 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
if (capsnap->context == snapc) {
found = 1;
capsnap->dirty_pages -= nr;
last_snap = !capsnap->dirty_pages;
break;
}
}
BUG_ON(!found);
capsnap->dirty_pages -= nr;
if (capsnap->dirty_pages == 0) {
complete_capsnap = 1;
if (capsnap->dirty == 0)
/* cap writeback completed before we created
* the cap_snap; no FLUSHSNAP is needed */
drop_capsnap = 1;
}
dout("put_wrbuffer_cap_refs on %p cap_snap %p "
" snap %lld %d/%d -> %d/%d %s%s\n",
" snap %lld %d/%d -> %d/%d %s%s%s\n",
inode, capsnap, capsnap->context->seq,
ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
ci->i_wrbuffer_ref, capsnap->dirty_pages,
last ? " (wrbuffer last)" : "",
last_snap ? " (capsnap last)" : "");
complete_capsnap ? " (complete capsnap)" : "",
drop_capsnap ? " (drop capsnap)" : "");
if (drop_capsnap) {
ceph_put_snap_context(capsnap->context);
list_del(&capsnap->ci_item);
list_del(&capsnap->flushing_item);
ceph_put_cap_snap(capsnap);
}
}

spin_unlock(&inode->i_lock);

if (last) {
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
iput(inode);
} else if (last_snap) {
} else if (complete_capsnap) {
ceph_flush_snaps(ci);
wake_up(&ci->i_cap_wq);
}
if (drop_capsnap)
iput(inode);
}

/*
Expand Down Expand Up @@ -2465,8 +2487,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
break;
}
WARN_ON(capsnap->dirty_pages || capsnap->writing);
dout(" removing cap_snap %p follows %lld\n",
capsnap, follows);
dout(" removing %p cap_snap %p follows %lld\n",
inode, capsnap, follows);
ceph_put_snap_context(capsnap->context);
list_del(&capsnap->ci_item);
list_del(&capsnap->flushing_item);
Expand Down
7 changes: 4 additions & 3 deletions fs/ceph/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,11 @@ static int __dcache_readdir(struct file *filp,
spin_lock(&inode->i_lock);
spin_lock(&dcache_lock);

last = dentry;

if (err < 0)
goto out_unlock;

last = dentry;

p = p->prev;
filp->f_pos++;

Expand Down Expand Up @@ -312,7 +312,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
req->r_readdir_offset = fi->next_offset;
req->r_args.readdir.frag = cpu_to_le32(frag);
req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
req->r_num_caps = max_entries;
req->r_num_caps = max_entries + 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err < 0) {
ceph_mdsc_put_request(req);
Expand Down Expand Up @@ -489,6 +489,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct inode *inode = ceph_get_snapdir(parent);
dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
dentry, dentry->d_name.len, dentry->d_name.name, inode);
BUG_ON(!d_unhashed(dentry));
d_add(dentry, inode);
err = 0;
}
Expand Down
10 changes: 9 additions & 1 deletion fs/ceph/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
struct inode *in = NULL;
struct ceph_mds_reply_inode *ininfo;
struct ceph_vino vino;
struct ceph_client *client = ceph_sb_to_client(sb);
int i = 0;
int err = 0;

Expand Down Expand Up @@ -949,7 +950,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
return err;
}

if (rinfo->head->is_dentry && !req->r_aborted) {
/*
* ignore null lease/binding on snapdir ENOENT, or else we
* will have trouble splicing in the virtual snapdir later
*/
if (rinfo->head->is_dentry && !req->r_aborted &&
(rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
client->mount_args->snapdir_name,
req->r_dentry->d_name.len))) {
/*
* lookup link rename : null -> possibly existing inode
* mknod symlink mkdir : null -> new inode
Expand Down
9 changes: 9 additions & 0 deletions fs/ceph/messenger.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ static char tag_msg = CEPH_MSGR_TAG_MSG;
static char tag_ack = CEPH_MSGR_TAG_ACK;
static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;

#ifdef CONFIG_LOCKDEP
static struct lock_class_key socket_class;
#endif


static void queue_con(struct ceph_connection *con);
static void con_work(struct work_struct *);
Expand Down Expand Up @@ -228,6 +232,10 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
con->sock = sock;
sock->sk->sk_allocation = GFP_NOFS;

#ifdef CONFIG_LOCKDEP
lockdep_set_class(&sock->sk->sk_lock, &socket_class);
#endif

set_sock_callbacks(sock, con);

dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
Expand Down Expand Up @@ -333,6 +341,7 @@ static void reset_connection(struct ceph_connection *con)
con->out_msg = NULL;
}
con->in_seq = 0;
con->in_seq_acked = 0;
}

/*
Expand Down
Loading

0 comments on commit 96e35b4

Please sign in to comment.