Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this organization
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
git-mirror
/
linux
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
0
Code
Issues
0
Pull requests
0
Actions
Projects
0
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Actions
Projects
Security
Insights
Files
master
Documentation
arch
block
certs
crypto
drivers
firmware
fs
9p
adfs
affs
afs
autofs4
befs
bfs
btrfs
cachefiles
ceph
cifs
coda
configfs
cramfs
crypto
debugfs
devpts
dlm
Kconfig
Makefile
ast.c
ast.h
config.c
config.h
debug_fs.c
dir.c
dir.h
dlm_internal.h
lock.c
lock.h
lockspace.c
lockspace.h
lowcomms.c
lowcomms.h
lvb_table.h
main.c
member.c
member.h
memory.c
memory.h
midcomms.c
midcomms.h
netlink.c
plock.c
rcom.c
rcom.h
recover.c
recover.h
recoverd.c
recoverd.h
requestqueue.c
requestqueue.h
user.c
user.h
util.c
util.h
ecryptfs
efivarfs
efs
exofs
exportfs
ext2
ext4
f2fs
fat
freevxfs
fscache
fuse
gfs2
hfs
hfsplus
hostfs
hpfs
hugetlbfs
isofs
jbd2
jffs2
jfs
kernfs
lockd
logfs
minix
ncpfs
nfs
nfs_common
nfsd
nilfs2
nls
notify
ntfs
ocfs2
omfs
openpromfs
orangefs
overlayfs
proc
pstore
qnx4
qnx6
quota
ramfs
reiserfs
romfs
squashfs
sysfs
sysv
tracefs
ubifs
udf
ufs
xfs
Kconfig
Kconfig.binfmt
Makefile
aio.c
anon_inodes.c
attr.c
bad_inode.c
binfmt_aout.c
binfmt_elf.c
binfmt_elf_fdpic.c
binfmt_em86.c
binfmt_flat.c
binfmt_misc.c
binfmt_script.c
block_dev.c
buffer.c
char_dev.c
compat.c
compat_binfmt_elf.c
compat_ioctl.c
coredump.c
dax.c
dcache.c
dcookies.c
direct-io.c
drop_caches.c
eventfd.c
eventpoll.c
exec.c
fcntl.c
fhandle.c
file.c
file_table.c
filesystems.c
fs-writeback.c
fs_pin.c
fs_struct.c
inode.c
internal.h
ioctl.c
libfs.c
locks.c
mbcache.c
mount.h
mpage.c
namei.c
namespace.c
no-block.c
nsfs.c
open.c
pipe.c
pnode.c
pnode.h
posix_acl.c
proc_namespace.c
read_write.c
readdir.c
select.c
seq_file.c
signalfd.c
splice.c
stack.c
stat.c
statfs.c
super.c
sync.c
timerfd.c
userfaultfd.c
utimes.c
xattr.c
include
init
ipc
kernel
lib
mm
net
samples
scripts
security
sound
tools
usr
virt
.get_maintainer.ignore
.gitignore
.mailmap
COPYING
CREDITS
Kbuild
Kconfig
MAINTAINERS
Makefile
README
REPORTING-BUGS
Breadcrumbs
linux
/
fs
/
dlm
/
recoverd.c
Blame
Blame
Latest commit
History
History
342 lines (267 loc) · 7.94 KB
Breadcrumbs
linux
/
fs
/
dlm
/
recoverd.c
Top
File metadata and controls
Code
Blame
342 lines (267 loc) · 7.94 KB
Raw
/****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions ** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ #include "dlm_internal.h" #include "lockspace.h" #include "member.h" #include "dir.h" #include "ast.h" #include "recover.h" #include "lowcomms.h" #include "lock.h" #include "requestqueue.h" #include "recoverd.h" /* If the start for which we're re-enabling locking (seq) has been superseded by a newer stop (ls_recover_seq), we need to leave locking disabled. We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees locking stopped and b) adds a message to the requestqueue, but dlm_recoverd enables locking and clears the requestqueue between a and b. */ static int enable_locking(struct dlm_ls *ls, uint64_t seq) { int error = -EINTR; down_write(&ls->ls_recv_active); spin_lock(&ls->ls_recover_lock); if (ls->ls_recover_seq == seq) { set_bit(LSFL_RUNNING, &ls->ls_flags); /* unblocks processes waiting to enter the dlm */ up_write(&ls->ls_in_recovery); clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); error = 0; } spin_unlock(&ls->ls_recover_lock); up_write(&ls->ls_recv_active); return error; } static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) { unsigned long start; int error, neg = 0; log_rinfo(ls, "dlm_recover %llu", (unsigned long long)rv->seq); mutex_lock(&ls->ls_recoverd_active); dlm_callback_suspend(ls); dlm_clear_toss(ls); /* * This list of root rsb's will be the basis of most of the recovery * routines. */ dlm_create_root_list(ls); /* * Add or remove nodes from the lockspace's ls_nodes list. */ error = dlm_recover_members(ls, rv, &neg); if (error) { log_rinfo(ls, "dlm_recover_members error %d", error); goto fail; } dlm_recover_dir_nodeid(ls); ls->ls_recover_dir_sent_res = 0; ls->ls_recover_dir_sent_msg = 0; ls->ls_recover_locks_in = 0; dlm_set_recover_status(ls, DLM_RS_NODES); error = dlm_recover_members_wait(ls); if (error) { log_rinfo(ls, "dlm_recover_members_wait error %d", error); goto fail; } start = jiffies; /* * Rebuild our own share of the directory by collecting from all other * nodes their master rsb names that hash to us. */ error = dlm_recover_directory(ls); if (error) { log_rinfo(ls, "dlm_recover_directory error %d", error); goto fail; } dlm_set_recover_status(ls, DLM_RS_DIR); error = dlm_recover_directory_wait(ls); if (error) { log_rinfo(ls, "dlm_recover_directory_wait error %d", error); goto fail; } log_rinfo(ls, "dlm_recover_directory %u out %u messages", ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg); /* * We may have outstanding operations that are waiting for a reply from * a failed node. Mark these to be resent after recovery. Unlock and * cancel ops can just be completed. */ dlm_recover_waiters_pre(ls); error = dlm_recovery_stopped(ls); if (error) goto fail; if (neg || dlm_no_directory(ls)) { /* * Clear lkb's for departed nodes. */ dlm_recover_purge(ls); /* * Get new master nodeid's for rsb's that were mastered on * departed nodes. */ error = dlm_recover_masters(ls); if (error) { log_rinfo(ls, "dlm_recover_masters error %d", error); goto fail; } /* * Send our locks on remastered rsb's to the new masters. */ error = dlm_recover_locks(ls); if (error) { log_rinfo(ls, "dlm_recover_locks error %d", error); goto fail; } dlm_set_recover_status(ls, DLM_RS_LOCKS); error = dlm_recover_locks_wait(ls); if (error) { log_rinfo(ls, "dlm_recover_locks_wait error %d", error); goto fail; } log_rinfo(ls, "dlm_recover_locks %u in", ls->ls_recover_locks_in); /* * Finalize state in master rsb's now that all locks can be * checked. This includes conversion resolution and lvb * settings. */ dlm_recover_rsbs(ls); } else { /* * Other lockspace members may be going through the "neg" steps * while also adding us to the lockspace, in which case they'll * be doing the recover_locks (RS_LOCKS) barrier. */ dlm_set_recover_status(ls, DLM_RS_LOCKS); error = dlm_recover_locks_wait(ls); if (error) { log_rinfo(ls, "dlm_recover_locks_wait error %d", error); goto fail; } } dlm_release_root_list(ls); /* * Purge directory-related requests that are saved in requestqueue. * All dir requests from before recovery are invalid now due to the dir * rebuild and will be resent by the requesting nodes. */ dlm_purge_requestqueue(ls); dlm_set_recover_status(ls, DLM_RS_DONE); error = dlm_recover_done_wait(ls); if (error) { log_rinfo(ls, "dlm_recover_done_wait error %d", error); goto fail; } dlm_clear_members_gone(ls); dlm_adjust_timeouts(ls); dlm_callback_resume(ls); error = enable_locking(ls, rv->seq); if (error) { log_rinfo(ls, "enable_locking error %d", error); goto fail; } error = dlm_process_requestqueue(ls); if (error) { log_rinfo(ls, "dlm_process_requestqueue error %d", error); goto fail; } error = dlm_recover_waiters_post(ls); if (error) { log_rinfo(ls, "dlm_recover_waiters_post error %d", error); goto fail; } dlm_recover_grant(ls); log_rinfo(ls, "dlm_recover %llu generation %u done: %u ms", (unsigned long long)rv->seq, ls->ls_generation, jiffies_to_msecs(jiffies - start)); mutex_unlock(&ls->ls_recoverd_active); dlm_lsop_recover_done(ls); return 0; fail: dlm_release_root_list(ls); log_rinfo(ls, "dlm_recover %llu error %d", (unsigned long long)rv->seq, error); mutex_unlock(&ls->ls_recoverd_active); return error; } /* The dlm_ls_start() that created the rv we take here may already have been stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP flag set. */ static void do_ls_recovery(struct dlm_ls *ls) { struct dlm_recover *rv = NULL; spin_lock(&ls->ls_recover_lock); rv = ls->ls_recover_args; ls->ls_recover_args = NULL; if (rv && ls->ls_recover_seq == rv->seq) clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags); spin_unlock(&ls->ls_recover_lock); if (rv) { ls_recover(ls, rv); kfree(rv->nodes); kfree(rv); } } static int dlm_recoverd(void *arg) { struct dlm_ls *ls; ls = dlm_find_lockspace_local(arg); if (!ls) { log_print("dlm_recoverd: no lockspace %p", arg); return -1; } down_write(&ls->ls_in_recovery); set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); wake_up(&ls->ls_recover_lock_wait); while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) && !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) schedule(); set_current_state(TASK_RUNNING); if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) { down_write(&ls->ls_in_recovery); set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); wake_up(&ls->ls_recover_lock_wait); } if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags)) do_ls_recovery(ls); } if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)) up_write(&ls->ls_in_recovery); dlm_put_lockspace(ls); return 0; } int dlm_recoverd_start(struct dlm_ls *ls) { struct task_struct *p; int error = 0; p = kthread_run(dlm_recoverd, ls, "dlm_recoverd"); if (IS_ERR(p)) error = PTR_ERR(p); else ls->ls_recoverd_task = p; return error; } void dlm_recoverd_stop(struct dlm_ls *ls) { kthread_stop(ls->ls_recoverd_task); } void dlm_recoverd_suspend(struct dlm_ls *ls) { wake_up(&ls->ls_wait_general); mutex_lock(&ls->ls_recoverd_active); } void dlm_recoverd_resume(struct dlm_ls *ls) { mutex_unlock(&ls->ls_recoverd_active); }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
You can’t perform that action at this time.