Skip to content

Commit

Permalink
GFS2: remove transaction glock
Browse files Browse the repository at this point in the history
GFS2 has a transaction glock, which must be grabbed for every
transaction, whose purpose is to deal with freezing the filesystem.
Aside from this involving a large amount of locking, it is very easy to
make the current fsfreeze code hang on unfreezing.

This patch rewrites how gfs2 handles freezing the filesystem. The
transaction glock is removed. In it's place is a freeze glock, which is
cached (but not held) in a shared state by every node in the cluster
when the filesystem is mounted. This lock only needs to be grabbed on
freezing, and actions which need to be safe from freezing, like
recovery.

When a node wants to freeze the filesystem, it grabs this glock
exclusively.  When the freeze glock state changes on the nodes (either
from shared to unlocked, or shared to exclusive), the filesystem does a
special log flush.  gfs2_log_flush() does all the work for flushing out
the and shutting down the incore log, and then it tries to grab the
freeze glock in a shared state again.  Since the filesystem is stuck in
gfs2_log_flush, no new transaction can start, and nothing can be written
to disk. Unfreezing the filesytem simply involes dropping the freeze
glock, allowing gfs2_log_flush() to grab and then release the shared
lock, so it is cached for next time.

However, in order for the unfreezing ioctl to occur, gfs2 needs to get a
shared lock on the filesystem root directory inode to check permissions.
If that glock has already been grabbed exclusively, fsfreeze will be
unable to get the shared lock and unfreeze the filesystem.

In order to allow the unfreeze, this patch makes gfs2 grab a shared lock
on the filesystem root directory during the freeze, and hold it until it
unfreezes the filesystem.  The functions which need to grab a shared
lock in order to allow the unfreeze ioctl to be issued now use the lock
grabbed by the freeze code instead.

The freeze and unfreeze code take care to make sure that this shared
lock will not be dropped while another process is using it.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
  • Loading branch information
Benjamin Marzinski authored and Steven Whitehouse committed May 14, 2014
1 parent 5a7c669 commit 2497255
Show file tree
Hide file tree
Showing 16 changed files with 227 additions and 153 deletions.
2 changes: 1 addition & 1 deletion fs/gfs2/aops.c
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping,

ret = gfs2_write_cache_jdata(mapping, wbc);
if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
gfs2_log_flush(sdp, ip->i_gl);
gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH);
ret = gfs2_write_cache_jdata(mapping, wbc);
}
return ret;
Expand Down
2 changes: 1 addition & 1 deletion fs/gfs2/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
}
if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
if (flags & GFS2_DIF_JDATA)
gfs2_log_flush(sdp, ip->i_gl);
gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH);
error = filemap_fdatawrite(inode->i_mapping);
if (error)
goto out;
Expand Down
51 changes: 32 additions & 19 deletions fs/gfs2/glops.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,23 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
if (!tr.tr_revokes)
return;

/* A shortened, inline version of gfs2_trans_begin() */
/* A shortened, inline version of gfs2_trans_begin()
* tr->alloced is not set since the transaction structure is
* on the stack */
tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
tr.tr_ip = (unsigned long)__builtin_return_address(0);
sb_start_intwrite(sdp->sd_vfs);
gfs2_log_reserve(sdp, tr.tr_reserved);
if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) {
sb_end_intwrite(sdp->sd_vfs);
return;
}
WARN_ON_ONCE(current->journal_info);
current->journal_info = &tr;

__gfs2_ail_flush(gl, 0, tr.tr_revokes);

gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL);
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
}

void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
Expand All @@ -121,7 +126,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
return;
__gfs2_ail_flush(gl, fsync, max_revokes);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL);
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
}

/**
Expand All @@ -144,7 +149,7 @@ static void rgrp_go_sync(struct gfs2_glock *gl)
return;
GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);

gfs2_log_flush(sdp, gl);
gfs2_log_flush(sdp, gl, NORMAL_FLUSH);
filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
mapping_set_error(mapping, error);
Expand Down Expand Up @@ -206,7 +211,7 @@ static void inode_go_sync(struct gfs2_glock *gl)

GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);

gfs2_log_flush(gl->gl_sbd, gl);
gfs2_log_flush(gl->gl_sbd, gl, NORMAL_FLUSH);
filemap_fdatawrite(metamapping);
if (ip) {
struct address_space *mapping = ip->i_inode.i_mapping;
Expand Down Expand Up @@ -253,7 +258,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
}

if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) {
gfs2_log_flush(gl->gl_sbd, NULL);
gfs2_log_flush(gl->gl_sbd, NULL, NORMAL_FLUSH);
gl->gl_sbd->sd_rindex_uptodate = 0;
}
if (ip && S_ISREG(ip->i_inode.i_mode))
Expand Down Expand Up @@ -455,31 +460,39 @@ static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
}

/**
* trans_go_sync - promote/demote the transaction glock
* freeze_go_sync - promote/demote the freeze glock
* @gl: the glock
* @state: the requested state
* @flags:
*
*/

static void trans_go_sync(struct gfs2_glock *gl)
static void freeze_go_sync(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
DEFINE_WAIT(wait);

if (gl->gl_state != LM_ST_UNLOCKED &&
if (gl->gl_state == LM_ST_SHARED &&
test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
gfs2_meta_syncfs(sdp);
gfs2_log_shutdown(sdp);
atomic_set(&sdp->sd_log_freeze, 1);
wake_up(&sdp->sd_logd_waitq);
do {
prepare_to_wait(&sdp->sd_log_frozen_wait, &wait,
TASK_UNINTERRUPTIBLE);
if (atomic_read(&sdp->sd_log_freeze))
io_schedule();
} while(atomic_read(&sdp->sd_log_freeze));
finish_wait(&sdp->sd_log_frozen_wait, &wait);
}
}

/**
* trans_go_xmote_bh - After promoting/demoting the transaction glock
* freeze_go_xmote_bh - After promoting/demoting the freeze glock
* @gl: the glock
*
*/

static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
Expand Down Expand Up @@ -512,7 +525,7 @@ static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
* Always returns 0
*/

static int trans_go_demote_ok(const struct gfs2_glock *gl)
static int freeze_go_demote_ok(const struct gfs2_glock *gl)
{
return 0;
}
Expand Down Expand Up @@ -563,10 +576,10 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_flags = GLOF_LVB,
};

const struct gfs2_glock_operations gfs2_trans_glops = {
.go_sync = trans_go_sync,
.go_xmote_bh = trans_go_xmote_bh,
.go_demote_ok = trans_go_demote_ok,
const struct gfs2_glock_operations gfs2_freeze_glops = {
.go_sync = freeze_go_sync,
.go_xmote_bh = freeze_go_xmote_bh,
.go_demote_ok = freeze_go_demote_ok,
.go_type = LM_TYPE_NONDISK,
};

Expand Down
2 changes: 1 addition & 1 deletion fs/gfs2/glops.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
extern const struct gfs2_glock_operations gfs2_meta_glops;
extern const struct gfs2_glock_operations gfs2_inode_glops;
extern const struct gfs2_glock_operations gfs2_rgrp_glops;
extern const struct gfs2_glock_operations gfs2_trans_glops;
extern const struct gfs2_glock_operations gfs2_freeze_glops;
extern const struct gfs2_glock_operations gfs2_iopen_glops;
extern const struct gfs2_glock_operations gfs2_flock_glops;
extern const struct gfs2_glock_operations gfs2_nondisk_glops;
Expand Down
12 changes: 8 additions & 4 deletions fs/gfs2/incore.h
Original file line number Diff line number Diff line change
Expand Up @@ -465,9 +465,7 @@ struct gfs2_trans {
unsigned int tr_reserved;
unsigned int tr_touched:1;
unsigned int tr_attached:1;

struct gfs2_holder tr_t_gh;

unsigned int tr_alloced:1;

unsigned int tr_num_buf_new;
unsigned int tr_num_databuf_new;
Expand Down Expand Up @@ -682,7 +680,7 @@ struct gfs2_sbd {
struct lm_lockstruct sd_lockstruct;
struct gfs2_holder sd_live_gh;
struct gfs2_glock *sd_rename_gl;
struct gfs2_glock *sd_trans_gl;
struct gfs2_glock *sd_freeze_gl;
wait_queue_head_t sd_glock_wait;
atomic_t sd_glock_disposal;
struct completion sd_locking_init;
Expand Down Expand Up @@ -794,6 +792,12 @@ struct gfs2_sbd {

/* For quiescing the filesystem */
struct gfs2_holder sd_freeze_gh;
struct gfs2_holder sd_freeze_root_gh;
struct gfs2_holder sd_thaw_gh;
atomic_t sd_log_freeze;
atomic_t sd_frozen_root;
wait_queue_head_t sd_frozen_root_wait;
wait_queue_head_t sd_log_frozen_wait;

char sd_fsname[GFS2_FSNAME_LEN];
char sd_table_name[GFS2_FSNAME_LEN];
Expand Down
40 changes: 30 additions & 10 deletions fs/gfs2/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1613,18 +1613,26 @@ int gfs2_permission(struct inode *inode, int mask)
{
struct gfs2_inode *ip;
struct gfs2_holder i_gh;
struct gfs2_sbd *sdp = GFS2_SB(inode);
int error;
int unlock = 0;
int frozen_root = 0;


ip = GFS2_I(inode);
if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return error;
unlock = 1;
if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) &&
inode == sdp->sd_root_dir->d_inode &&
atomic_inc_not_zero(&sdp->sd_frozen_root)))
frozen_root = 1;
else {
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return error;
unlock = 1;
}
}

if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
Expand All @@ -1633,6 +1641,8 @@ int gfs2_permission(struct inode *inode, int mask)
error = generic_permission(inode, mask);
if (unlock)
gfs2_glock_dq_uninit(&i_gh);
else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root))
wake_up(&sdp->sd_frozen_root_wait);

return error;
}
Expand Down Expand Up @@ -1805,19 +1815,29 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct inode *inode = dentry->d_inode;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
struct gfs2_sbd *sdp = GFS2_SB(inode);
int error;
int unlock = 0;
int frozen_root = 0;

if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
if (error)
return error;
unlock = 1;
if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) &&
inode == sdp->sd_root_dir->d_inode &&
atomic_inc_not_zero(&sdp->sd_frozen_root)))
frozen_root = 1;
else {
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
if (error)
return error;
unlock = 1;
}
}

generic_fillattr(inode, stat);
if (unlock)
gfs2_glock_dq_uninit(&gh);
else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root))
wake_up(&sdp->sd_frozen_root_wait);

return 0;
}
Expand Down
Loading

0 comments on commit 2497255

Please sign in to comment.