Skip to content

Commit

Permalink
ocfs2: Remove mount/unmount votes
Browse files Browse the repository at this point in the history
The node maps that are set/unset by these votes are no longer relevant, thus
we can remove the mount and umount votes. Since those are the last two
remaining votes, we can also remove the entire vote infrastructure.

The vote thread has been renamed to the downconvert thread, and the small
amount of functionality related to managing it has been moved into
fs/ocfs2/dlmglue.c. All references to votes have been removed or updated.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
  • Loading branch information
Mark Fasheh committed Jan 25, 2008
1 parent 6f7b056 commit 34d024f
Show file tree
Hide file tree
Showing 15 changed files with 179 additions and 967 deletions.
3 changes: 1 addition & 2 deletions fs/ocfs2/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ ocfs2-objs := \
symlink.o \
sysfile.o \
uptodate.o \
ver.o \
vote.o
ver.o

obj-$(CONFIG_OCFS2_FS) += cluster/
obj-$(CONFIG_OCFS2_FS) += dlm/
5 changes: 4 additions & 1 deletion fs/ocfs2/cluster/tcp_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
* locking semantics of the file system using the protocol. It should
* be somewhere else, I'm sure, but right now it isn't.
*
* New in version 9:
* - All votes removed
*
* New in version 8:
* - Replace delete inode votes with a cluster lock
*
Expand All @@ -60,7 +63,7 @@
* - full 64 bit i_size in the metadata lock lvbs
* - introduction of "rw" lock and pushing meta/data locking down
*/
#define O2NET_PROTOCOL_VERSION 8ULL
#define O2NET_PROTOCOL_VERSION 9ULL
struct o2net_handshake {
__be64 protocol_version;
__be64 connector_id;
Expand Down
8 changes: 4 additions & 4 deletions fs/ocfs2/dcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ static int ocfs2_match_dentry(struct dentry *dentry,
/*
* Walk the inode alias list, and find a dentry which has a given
* parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
* is looking for a dentry_lock reference. The vote thread is looking
* to unhash aliases, so we allow it to skip any that already have
* that property.
* is looking for a dentry_lock reference. The downconvert thread is
* looking to unhash aliases, so we allow it to skip any that already
* have that property.
*/
struct dentry *ocfs2_find_local_alias(struct inode *inode,
u64 parent_blkno,
Expand Down Expand Up @@ -266,7 +266,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
dl->dl_count = 0;
/*
* Does this have to happen below, for all attaches, in case
* the struct inode gets blown away by votes?
* the struct inode gets blown away by the downconvert thread?
*/
dl->dl_inode = igrab(inode);
dl->dl_parent_blkno = parent_blkno;
Expand Down
164 changes: 128 additions & 36 deletions fs/ocfs2/dlmglue.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
#include "slot_map.h"
#include "super.h"
#include "uptodate.h"
#include "vote.h"

#include "buffer_head_io.h"

Expand Down Expand Up @@ -153,10 +152,10 @@ struct ocfs2_lock_res_ops {
struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);

/*
* Optionally called in the downconvert (or "vote") thread
* after a successful downconvert. The lockres will not be
* referenced after this callback is called, so it is safe to
* free memory, etc.
* Optionally called in the downconvert thread after a
* successful downconvert. The lockres will not be referenced
* after this callback is called, so it is safe to free
* memory, etc.
*
* The exact semantics of when this is called are controlled
* by ->downconvert_worker()
Expand Down Expand Up @@ -310,8 +309,9 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
"resource %s: %s\n", dlm_errname(_stat), _func, \
_lockres->l_name, dlm_errmsg(_stat)); \
} while (0)
static void ocfs2_vote_on_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
static int ocfs2_downconvert_thread(void *arg);
static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
static int ocfs2_meta_lock_update(struct inode *inode,
struct buffer_head **bh);
static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
Expand Down Expand Up @@ -732,7 +732,7 @@ static void ocfs2_blocking_ast(void *opaque, int level)

wake_up(&lockres->l_event);

ocfs2_kick_vote_thread(osb);
ocfs2_wake_downconvert_thread(osb);
}

static void ocfs2_locking_ast(void *opaque)
Expand Down Expand Up @@ -1089,7 +1089,7 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
mlog_entry_void();
spin_lock_irqsave(&lockres->l_lock, flags);
ocfs2_dec_holders(lockres, level);
ocfs2_vote_on_unlock(osb, lockres);
ocfs2_downconvert_on_unlock(osb, lockres);
spin_unlock_irqrestore(&lockres->l_lock, flags);
mlog_exit_void();
}
Expand Down Expand Up @@ -1372,15 +1372,15 @@ int ocfs2_data_lock_with_page(struct inode *inode,
return ret;
}

static void ocfs2_vote_on_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres)
static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres)
{
int kick = 0;

mlog_entry_void();

/* If we know that another node is waiting on our lock, kick
* the vote thread * pre-emptively when we reach a release
* the downconvert thread * pre-emptively when we reach a release
* condition. */
if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
switch(lockres->l_blocking) {
Expand All @@ -1398,7 +1398,7 @@ static void ocfs2_vote_on_unlock(struct ocfs2_super *osb,
}

if (kick)
ocfs2_kick_vote_thread(osb);
ocfs2_wake_downconvert_thread(osb);

mlog_exit_void();
}
Expand Down Expand Up @@ -1832,19 +1832,20 @@ int ocfs2_meta_lock_full(struct inode *inode,
}

/*
* This is working around a lock inversion between tasks acquiring DLM locks
* while holding a page lock and the vote thread which blocks dlm lock acquiry
* while acquiring page locks.
* This is working around a lock inversion between tasks acquiring DLM
* locks while holding a page lock and the downconvert thread which
* blocks dlm lock acquiry while acquiring page locks.
*
* ** These _with_page variantes are only intended to be called from aop
* methods that hold page locks and return a very specific *positive* error
* code that aop methods pass up to the VFS -- test for errors with != 0. **
*
* The DLM is called such that it returns -EAGAIN if it would have blocked
* waiting for the vote thread. In that case we unlock our page so the vote
* thread can make progress. Once we've done this we have to return
* AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up
* into the VFS who will then immediately retry the aop call.
* The DLM is called such that it returns -EAGAIN if it would have
* blocked waiting for the downconvert thread. In that case we unlock
* our page so the downconvert thread can make progress. Once we've
* done this we have to return AOP_TRUNCATED_PAGE so the aop method
* that called us can bubble that back up into the VFS who will then
* immediately retry the aop call.
*
* We do a blocking lock and immediate unlock before returning, though, so that
* the lock has a great chance of being cached on this node by the time the VFS
Expand Down Expand Up @@ -2320,11 +2321,11 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
goto bail;
}

/* launch vote thread */
osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote");
if (IS_ERR(osb->vote_task)) {
status = PTR_ERR(osb->vote_task);
osb->vote_task = NULL;
/* launch downconvert thread */
osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc");
if (IS_ERR(osb->dc_task)) {
status = PTR_ERR(osb->dc_task);
osb->dc_task = NULL;
mlog_errno(status);
goto bail;
}
Expand Down Expand Up @@ -2353,8 +2354,8 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
bail:
if (status < 0) {
ocfs2_dlm_shutdown_debug(osb);
if (osb->vote_task)
kthread_stop(osb->vote_task);
if (osb->dc_task)
kthread_stop(osb->dc_task);
}

mlog_exit(status);
Expand All @@ -2369,9 +2370,9 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb)

ocfs2_drop_osb_locks(osb);

if (osb->vote_task) {
kthread_stop(osb->vote_task);
osb->vote_task = NULL;
if (osb->dc_task) {
kthread_stop(osb->dc_task);
osb->dc_task = NULL;
}

ocfs2_lock_res_free(&osb->osb_super_lockres);
Expand Down Expand Up @@ -2527,7 +2528,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,

/* Mark the lockres as being dropped. It will no longer be
* queued if blocking, but we still may have to wait on it
* being dequeued from the vote thread before we can consider
* being dequeued from the downconvert thread before we can consider
* it safe to drop.
*
* You can *not* attempt to call cluster_lock on this lockres anymore. */
Expand Down Expand Up @@ -2903,7 +2904,7 @@ static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)

/*
* Does the final reference drop on our dentry lock. Right now this
* happens in the vote thread, but we could choose to simplify the
* happens in the downconvert thread, but we could choose to simplify the
* dlmglue API and push these off to the ocfs2_wq in the future.
*/
static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
Expand Down Expand Up @@ -3042,7 +3043,7 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
mlog(0, "lockres %s blocked.\n", lockres->l_name);

/* Detect whether a lock has been marked as going away while
* the vote thread was processing other things. A lock can
* the downconvert thread was processing other things. A lock can
* still be marked with OCFS2_LOCK_FREEING after this check,
* but short circuiting here will still save us some
* performance. */
Expand Down Expand Up @@ -3091,13 +3092,104 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,

lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);

spin_lock(&osb->vote_task_lock);
spin_lock(&osb->dc_task_lock);
if (list_empty(&lockres->l_blocked_list)) {
list_add_tail(&lockres->l_blocked_list,
&osb->blocked_lock_list);
osb->blocked_lock_count++;
}
spin_unlock(&osb->vote_task_lock);
spin_unlock(&osb->dc_task_lock);

mlog_exit_void();
}

static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
{
unsigned long processed;
struct ocfs2_lock_res *lockres;

mlog_entry_void();

spin_lock(&osb->dc_task_lock);
/* grab this early so we know to try again if a state change and
* wake happens part-way through our work */
osb->dc_work_sequence = osb->dc_wake_sequence;

processed = osb->blocked_lock_count;
while (processed) {
BUG_ON(list_empty(&osb->blocked_lock_list));

lockres = list_entry(osb->blocked_lock_list.next,
struct ocfs2_lock_res, l_blocked_list);
list_del_init(&lockres->l_blocked_list);
osb->blocked_lock_count--;
spin_unlock(&osb->dc_task_lock);

BUG_ON(!processed);
processed--;

ocfs2_process_blocked_lock(osb, lockres);

spin_lock(&osb->dc_task_lock);
}
spin_unlock(&osb->dc_task_lock);

mlog_exit_void();
}

static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
{
int empty = 0;

spin_lock(&osb->dc_task_lock);
if (list_empty(&osb->blocked_lock_list))
empty = 1;

spin_unlock(&osb->dc_task_lock);
return empty;
}

static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
{
int should_wake = 0;

spin_lock(&osb->dc_task_lock);
if (osb->dc_work_sequence != osb->dc_wake_sequence)
should_wake = 1;
spin_unlock(&osb->dc_task_lock);

return should_wake;
}

int ocfs2_downconvert_thread(void *arg)
{
int status = 0;
struct ocfs2_super *osb = arg;

/* only quit once we've been asked to stop and there is no more
* work available */
while (!(kthread_should_stop() &&
ocfs2_downconvert_thread_lists_empty(osb))) {

wait_event_interruptible(osb->dc_event,
ocfs2_downconvert_thread_should_wake(osb) ||
kthread_should_stop());

mlog(0, "downconvert_thread: awoken\n");

ocfs2_downconvert_thread_do_work(osb);
}

osb->dc_task = NULL;
return status;
}

void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
{
spin_lock(&osb->dc_task_lock);
/* make sure the voting thread gets a swipe at whatever changes
* the caller may have made to the voting state */
osb->dc_wake_sequence++;
spin_unlock(&osb->dc_task_lock);
wake_up(&osb->dc_event);
}
5 changes: 3 additions & 2 deletions fs/ocfs2/dlmglue.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ struct ocfs2_meta_lvb {
#define OCFS2_META_LOCK_RECOVERY (0x01)
/* Instruct the dlm not to queue ourselves on the other node. */
#define OCFS2_META_LOCK_NOQUEUE (0x02)
/* don't block waiting for the vote thread, instead return -EAGAIN */
/* don't block waiting for the downconvert thread, instead return -EAGAIN */
#define OCFS2_LOCK_NONBLOCK (0x04)

int ocfs2_dlm_init(struct ocfs2_super *osb);
Expand Down Expand Up @@ -112,9 +112,10 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);

/* for the vote thread */
/* for the downconvert thread */
void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb);

struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void);
void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug);
Expand Down
7 changes: 0 additions & 7 deletions fs/ocfs2/heartbeat.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
#include "heartbeat.h"
#include "inode.h"
#include "journal.h"
#include "vote.h"

#include "buffer_head_io.h"

Expand All @@ -58,9 +57,7 @@ static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
void ocfs2_init_node_maps(struct ocfs2_super *osb)
{
spin_lock_init(&osb->node_map_lock);
ocfs2_node_map_init(&osb->mounted_map);
ocfs2_node_map_init(&osb->recovery_map);
ocfs2_node_map_init(&osb->umount_map);
ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs);
}

Expand All @@ -82,8 +79,6 @@ static void ocfs2_do_node_down(int node_num,
}

ocfs2_recovery_thread(osb, node_num);

ocfs2_remove_node_from_vote_queues(osb, node_num);
}

/* Called from the dlm when it's about to evict a node. We may also
Expand Down Expand Up @@ -268,8 +263,6 @@ int ocfs2_recovery_map_set(struct ocfs2_super *osb,

spin_lock(&osb->node_map_lock);

__ocfs2_node_map_clear_bit(&osb->mounted_map, num);

if (!test_bit(num, osb->recovery_map.map)) {
__ocfs2_node_map_set_bit(&osb->recovery_map, num);
set = 1;
Expand Down
Loading

0 comments on commit 34d024f

Please sign in to comment.