Skip to content

Commit

Permalink
[DLM] timeout fixes
Browse files Browse the repository at this point in the history
Various fixes related to the new timeout feature:
- add_timeout() missed setting TIMEWARN flag on lkb's when the
  TIMEOUT flag was already set
- clear_proc_locks should remove a dead process's locks from the
  timeout list
- the end-of-life calculation for user locks needs to consider that
  ETIMEDOUT is equivalent to -DLM_ECANCEL
- make initial default timewarn_cs config value visible in configfs
- change bit position of TIMEOUT_CANCEL flag so it's not copied to
  a remote master node
- set timestamp on remote lkb's so a lock dump will display the time
  they've been waiting

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
  • Loading branch information
David Teigland authored and Steven Whitehouse committed Jul 9, 2007
1 parent b3cab7b commit 84d8cd6
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 28 deletions.
1 change: 1 addition & 0 deletions fs/dlm/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@ static struct config_group *make_cluster(struct config_group *g,
cl->cl_toss_secs = dlm_config.ci_toss_secs;
cl->cl_scan_secs = dlm_config.ci_scan_secs;
cl->cl_log_debug = dlm_config.ci_log_debug;
cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;

space_list = &sps->ss_group;
comm_list = &cms->cs_group;
Expand Down
2 changes: 1 addition & 1 deletion fs/dlm/dlm_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,9 @@ struct dlm_args {
#define DLM_IFL_OVERLAP_CANCEL 0x00100000
#define DLM_IFL_ENDOFLIFE 0x00200000
#define DLM_IFL_WATCH_TIMEWARN 0x00400000
#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
#define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002
#define DLM_IFL_TIMEOUT_CANCEL 0x00000004

struct dlm_lkb {
struct dlm_rsb *lkb_resource; /* the rsb */
Expand Down
13 changes: 7 additions & 6 deletions fs/dlm/lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -1010,17 +1010,18 @@ static void add_timeout(struct dlm_lkb *lkb)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;

if (is_master_copy(lkb))
if (is_master_copy(lkb)) {
lkb->lkb_timestamp = jiffies;
return;

if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
goto add_it;
}

if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
goto add_it;
}
if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
goto add_it;
return;

add_it:
Expand Down Expand Up @@ -3510,8 +3511,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
case -DLM_ECANCEL:
receive_flags_reply(lkb, ms);
revert_lock_pc(r, lkb);
if (ms->m_result)
queue_cast(r, lkb, -DLM_ECANCEL);
queue_cast(r, lkb, -DLM_ECANCEL);
break;
case 0:
break;
Expand Down Expand Up @@ -4534,6 +4534,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
lkb = del_proc_lock(ls, proc);
if (!lkb)
break;
del_timeout(lkb);
if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
orphan_proc_lock(ls, lkb);
else
Expand Down
2 changes: 0 additions & 2 deletions fs/dlm/netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,6 @@ void dlm_timeout_warn(struct dlm_lkb *lkb)
size_t size;
int rv;

log_debug(lkb->lkb_resource->res_ls, "timeout_warn %x", lkb->lkb_id);

size = nla_total_size(sizeof(struct dlm_lock_data)) +
nla_total_size(0); /* why this? */

Expand Down
49 changes: 30 additions & 19 deletions fs/dlm/user.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,35 @@ static void compat_output(struct dlm_lock_result *res,
}
#endif

/* Figure out if this lock is at the end of its life and no longer
available for the application to use. The lkb still exists until
the final ast is read. A lock becomes EOL in three situations:
1. a noqueue request fails with EAGAIN
2. an unlock completes with EUNLOCK
3. a cancel of a waiting request completes with ECANCEL/EDEADLK
An EOL lock needs to be removed from the process's list of locks.
And we can't allow any new operation on an EOL lock. This is
not related to the lifetime of the lkb struct which is managed
entirely by refcount. */

static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
{
switch (sb_status) {
case -DLM_EUNLOCK:
return 1;
case -DLM_ECANCEL:
case -ETIMEDOUT:
if (lkb->lkb_grmode == DLM_LOCK_IV)
return 1;
break;
case -EAGAIN:
if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
return 1;
break;
}
return 0;
}

/* we could possibly check if the cancel of an orphan has resulted in the lkb
being removed and then remove that lkb from the orphans list and free it */

Expand Down Expand Up @@ -184,25 +213,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
log_debug(ls, "ast overlap %x status %x %x",
lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);

/* Figure out if this lock is at the end of its life and no longer
available for the application to use. The lkb still exists until
the final ast is read. A lock becomes EOL in three situations:
1. a noqueue request fails with EAGAIN
2. an unlock completes with EUNLOCK
3. a cancel of a waiting request completes with ECANCEL
An EOL lock needs to be removed from the process's list of locks.
And we can't allow any new operation on an EOL lock. This is
not related to the lifetime of the lkb struct which is managed
entirely by refcount. */

if (type == AST_COMP &&
lkb->lkb_grmode == DLM_LOCK_IV &&
ua->lksb.sb_status == -EAGAIN)
eol = 1;
else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
(ua->lksb.sb_status == -DLM_ECANCEL &&
lkb->lkb_grmode == DLM_LOCK_IV))
eol = 1;
eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
if (eol) {
lkb->lkb_ast_type &= ~AST_BAST;
lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
Expand Down

0 comments on commit 84d8cd6

Please sign in to comment.