Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 76936
b: refs/heads/master
c: 34d024f
h: refs/heads/master
v: v3
  • Loading branch information
Mark Fasheh committed Jan 25, 2008
1 parent ccb10ed commit 97023ab
Show file tree
Hide file tree
Showing 16 changed files with 180 additions and 968 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 6f7b056ea9c6fa978c79ca626eff43549df94dbb
refs/heads/master: 34d024f84345807bf44163fac84e921513dde323
3 changes: 1 addition & 2 deletions trunk/fs/ocfs2/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ ocfs2-objs := \
symlink.o \
sysfile.o \
uptodate.o \
ver.o \
vote.o
ver.o

obj-$(CONFIG_OCFS2_FS) += cluster/
obj-$(CONFIG_OCFS2_FS) += dlm/
5 changes: 4 additions & 1 deletion trunk/fs/ocfs2/cluster/tcp_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
* locking semantics of the file system using the protocol. It should
* be somewhere else, I'm sure, but right now it isn't.
*
* New in version 9:
* - All votes removed
*
* New in version 8:
* - Replace delete inode votes with a cluster lock
*
Expand All @@ -60,7 +63,7 @@
* - full 64 bit i_size in the metadata lock lvbs
* - introduction of "rw" lock and pushing meta/data locking down
*/
#define O2NET_PROTOCOL_VERSION 8ULL
#define O2NET_PROTOCOL_VERSION 9ULL
struct o2net_handshake {
__be64 protocol_version;
__be64 connector_id;
Expand Down
8 changes: 4 additions & 4 deletions trunk/fs/ocfs2/dcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ static int ocfs2_match_dentry(struct dentry *dentry,
/*
* Walk the inode alias list, and find a dentry which has a given
* parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
* is looking for a dentry_lock reference. The vote thread is looking
* to unhash aliases, so we allow it to skip any that already have
* that property.
* is looking for a dentry_lock reference. The downconvert thread is
* looking to unhash aliases, so we allow it to skip any that already
* have that property.
*/
struct dentry *ocfs2_find_local_alias(struct inode *inode,
u64 parent_blkno,
Expand Down Expand Up @@ -266,7 +266,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
dl->dl_count = 0;
/*
* Does this have to happen below, for all attaches, in case
* the struct inode gets blown away by votes?
* the struct inode gets blown away by the downconvert thread?
*/
dl->dl_inode = igrab(inode);
dl->dl_parent_blkno = parent_blkno;
Expand Down
164 changes: 128 additions & 36 deletions trunk/fs/ocfs2/dlmglue.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
#include "slot_map.h"
#include "super.h"
#include "uptodate.h"
#include "vote.h"

#include "buffer_head_io.h"

Expand Down Expand Up @@ -153,10 +152,10 @@ struct ocfs2_lock_res_ops {
struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);

/*
* Optionally called in the downconvert (or "vote") thread
* after a successful downconvert. The lockres will not be
* referenced after this callback is called, so it is safe to
* free memory, etc.
* Optionally called in the downconvert thread after a
* successful downconvert. The lockres will not be referenced
* after this callback is called, so it is safe to free
* memory, etc.
*
* The exact semantics of when this is called are controlled
* by ->downconvert_worker()
Expand Down Expand Up @@ -310,8 +309,9 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
"resource %s: %s\n", dlm_errname(_stat), _func, \
_lockres->l_name, dlm_errmsg(_stat)); \
} while (0)
static void ocfs2_vote_on_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
static int ocfs2_downconvert_thread(void *arg);
static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
static int ocfs2_meta_lock_update(struct inode *inode,
struct buffer_head **bh);
static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
Expand Down Expand Up @@ -732,7 +732,7 @@ static void ocfs2_blocking_ast(void *opaque, int level)

wake_up(&lockres->l_event);

ocfs2_kick_vote_thread(osb);
ocfs2_wake_downconvert_thread(osb);
}

static void ocfs2_locking_ast(void *opaque)
Expand Down Expand Up @@ -1089,7 +1089,7 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
mlog_entry_void();
spin_lock_irqsave(&lockres->l_lock, flags);
ocfs2_dec_holders(lockres, level);
ocfs2_vote_on_unlock(osb, lockres);
ocfs2_downconvert_on_unlock(osb, lockres);
spin_unlock_irqrestore(&lockres->l_lock, flags);
mlog_exit_void();
}
Expand Down Expand Up @@ -1372,15 +1372,15 @@ int ocfs2_data_lock_with_page(struct inode *inode,
return ret;
}

static void ocfs2_vote_on_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres)
static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres)
{
int kick = 0;

mlog_entry_void();

/* If we know that another node is waiting on our lock, kick
* the vote thread * pre-emptively when we reach a release
* the downconvert thread * pre-emptively when we reach a release
* condition. */
if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
switch(lockres->l_blocking) {
Expand All @@ -1398,7 +1398,7 @@ static void ocfs2_vote_on_unlock(struct ocfs2_super *osb,
}

if (kick)
ocfs2_kick_vote_thread(osb);
ocfs2_wake_downconvert_thread(osb);

mlog_exit_void();
}
Expand Down Expand Up @@ -1832,19 +1832,20 @@ int ocfs2_meta_lock_full(struct inode *inode,
}

/*
* This is working around a lock inversion between tasks acquiring DLM locks
* while holding a page lock and the vote thread which blocks dlm lock acquiry
* while acquiring page locks.
* This is working around a lock inversion between tasks acquiring DLM
* locks while holding a page lock and the downconvert thread which
* blocks dlm lock acquiry while acquiring page locks.
*
* ** These _with_page variantes are only intended to be called from aop
* methods that hold page locks and return a very specific *positive* error
* code that aop methods pass up to the VFS -- test for errors with != 0. **
*
* The DLM is called such that it returns -EAGAIN if it would have blocked
* waiting for the vote thread. In that case we unlock our page so the vote
* thread can make progress. Once we've done this we have to return
* AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up
* into the VFS who will then immediately retry the aop call.
* The DLM is called such that it returns -EAGAIN if it would have
* blocked waiting for the downconvert thread. In that case we unlock
* our page so the downconvert thread can make progress. Once we've
* done this we have to return AOP_TRUNCATED_PAGE so the aop method
* that called us can bubble that back up into the VFS who will then
* immediately retry the aop call.
*
* We do a blocking lock and immediate unlock before returning, though, so that
* the lock has a great chance of being cached on this node by the time the VFS
Expand Down Expand Up @@ -2320,11 +2321,11 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
goto bail;
}

/* launch vote thread */
osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote");
if (IS_ERR(osb->vote_task)) {
status = PTR_ERR(osb->vote_task);
osb->vote_task = NULL;
/* launch downconvert thread */
osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc");
if (IS_ERR(osb->dc_task)) {
status = PTR_ERR(osb->dc_task);
osb->dc_task = NULL;
mlog_errno(status);
goto bail;
}
Expand Down Expand Up @@ -2353,8 +2354,8 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
bail:
if (status < 0) {
ocfs2_dlm_shutdown_debug(osb);
if (osb->vote_task)
kthread_stop(osb->vote_task);
if (osb->dc_task)
kthread_stop(osb->dc_task);
}

mlog_exit(status);
Expand All @@ -2369,9 +2370,9 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb)

ocfs2_drop_osb_locks(osb);

if (osb->vote_task) {
kthread_stop(osb->vote_task);
osb->vote_task = NULL;
if (osb->dc_task) {
kthread_stop(osb->dc_task);
osb->dc_task = NULL;
}

ocfs2_lock_res_free(&osb->osb_super_lockres);
Expand Down Expand Up @@ -2527,7 +2528,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,

/* Mark the lockres as being dropped. It will no longer be
* queued if blocking, but we still may have to wait on it
* being dequeued from the vote thread before we can consider
* being dequeued from the downconvert thread before we can consider
* it safe to drop.
*
* You can *not* attempt to call cluster_lock on this lockres anymore. */
Expand Down Expand Up @@ -2903,7 +2904,7 @@ static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)

/*
* Does the final reference drop on our dentry lock. Right now this
* happens in the vote thread, but we could choose to simplify the
* happens in the downconvert thread, but we could choose to simplify the
* dlmglue API and push these off to the ocfs2_wq in the future.
*/
static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
Expand Down Expand Up @@ -3042,7 +3043,7 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
mlog(0, "lockres %s blocked.\n", lockres->l_name);

/* Detect whether a lock has been marked as going away while
* the vote thread was processing other things. A lock can
* the downconvert thread was processing other things. A lock can
* still be marked with OCFS2_LOCK_FREEING after this check,
* but short circuiting here will still save us some
* performance. */
Expand Down Expand Up @@ -3091,13 +3092,104 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,

lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);

spin_lock(&osb->vote_task_lock);
spin_lock(&osb->dc_task_lock);
if (list_empty(&lockres->l_blocked_list)) {
list_add_tail(&lockres->l_blocked_list,
&osb->blocked_lock_list);
osb->blocked_lock_count++;
}
spin_unlock(&osb->vote_task_lock);
spin_unlock(&osb->dc_task_lock);

mlog_exit_void();
}

static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
{
unsigned long processed;
struct ocfs2_lock_res *lockres;

mlog_entry_void();

spin_lock(&osb->dc_task_lock);
/* grab this early so we know to try again if a state change and
* wake happens part-way through our work */
osb->dc_work_sequence = osb->dc_wake_sequence;

processed = osb->blocked_lock_count;
while (processed) {
BUG_ON(list_empty(&osb->blocked_lock_list));

lockres = list_entry(osb->blocked_lock_list.next,
struct ocfs2_lock_res, l_blocked_list);
list_del_init(&lockres->l_blocked_list);
osb->blocked_lock_count--;
spin_unlock(&osb->dc_task_lock);

BUG_ON(!processed);
processed--;

ocfs2_process_blocked_lock(osb, lockres);

spin_lock(&osb->dc_task_lock);
}
spin_unlock(&osb->dc_task_lock);

mlog_exit_void();
}

static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
{
int empty = 0;

spin_lock(&osb->dc_task_lock);
if (list_empty(&osb->blocked_lock_list))
empty = 1;

spin_unlock(&osb->dc_task_lock);
return empty;
}

static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
{
int should_wake = 0;

spin_lock(&osb->dc_task_lock);
if (osb->dc_work_sequence != osb->dc_wake_sequence)
should_wake = 1;
spin_unlock(&osb->dc_task_lock);

return should_wake;
}

int ocfs2_downconvert_thread(void *arg)
{
int status = 0;
struct ocfs2_super *osb = arg;

/* only quit once we've been asked to stop and there is no more
* work available */
while (!(kthread_should_stop() &&
ocfs2_downconvert_thread_lists_empty(osb))) {

wait_event_interruptible(osb->dc_event,
ocfs2_downconvert_thread_should_wake(osb) ||
kthread_should_stop());

mlog(0, "downconvert_thread: awoken\n");

ocfs2_downconvert_thread_do_work(osb);
}

osb->dc_task = NULL;
return status;
}

void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
{
spin_lock(&osb->dc_task_lock);
/* make sure the voting thread gets a swipe at whatever changes
* the caller may have made to the voting state */
osb->dc_wake_sequence++;
spin_unlock(&osb->dc_task_lock);
wake_up(&osb->dc_event);
}
5 changes: 3 additions & 2 deletions trunk/fs/ocfs2/dlmglue.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ struct ocfs2_meta_lvb {
#define OCFS2_META_LOCK_RECOVERY (0x01)
/* Instruct the dlm not to queue ourselves on the other node. */
#define OCFS2_META_LOCK_NOQUEUE (0x02)
/* don't block waiting for the vote thread, instead return -EAGAIN */
/* don't block waiting for the downconvert thread, instead return -EAGAIN */
#define OCFS2_LOCK_NONBLOCK (0x04)

int ocfs2_dlm_init(struct ocfs2_super *osb);
Expand Down Expand Up @@ -112,9 +112,10 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);

/* for the vote thread */
/* for the downconvert thread */
void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb);

struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void);
void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug);
Expand Down
7 changes: 0 additions & 7 deletions trunk/fs/ocfs2/heartbeat.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
#include "heartbeat.h"
#include "inode.h"
#include "journal.h"
#include "vote.h"

#include "buffer_head_io.h"

Expand All @@ -58,9 +57,7 @@ static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
void ocfs2_init_node_maps(struct ocfs2_super *osb)
{
spin_lock_init(&osb->node_map_lock);
ocfs2_node_map_init(&osb->mounted_map);
ocfs2_node_map_init(&osb->recovery_map);
ocfs2_node_map_init(&osb->umount_map);
ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs);
}

Expand All @@ -82,8 +79,6 @@ static void ocfs2_do_node_down(int node_num,
}

ocfs2_recovery_thread(osb, node_num);

ocfs2_remove_node_from_vote_queues(osb, node_num);
}

/* Called from the dlm when it's about to evict a node. We may also
Expand Down Expand Up @@ -268,8 +263,6 @@ int ocfs2_recovery_map_set(struct ocfs2_super *osb,

spin_lock(&osb->node_map_lock);

__ocfs2_node_map_clear_bit(&osb->mounted_map, num);

if (!test_bit(num, osb->recovery_map.map)) {
__ocfs2_node_map_set_bit(&osb->recovery_map, num);
set = 1;
Expand Down
Loading

0 comments on commit 97023ab

Please sign in to comment.