Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 276180
b: refs/heads/master
c: 6b27f62
h: refs/heads/master
v: v3
  • Loading branch information
Sunil Mushran committed Jul 24, 2011
1 parent f178db0 commit 69d99db
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 14 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 3ba169ccec1c5ad0f678e04fd29b990197fdfe79
refs/heads/master: 6b27f62fc750d85bc6fc3718b3b38ec60edc2d74
7 changes: 0 additions & 7 deletions trunk/fs/ocfs2/dlm/dlmdomain.c
Original file line number Diff line number Diff line change
Expand Up @@ -2138,13 +2138,6 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
goto leave;
}

if (!o2hb_check_local_node_heartbeating()) {
mlog(ML_ERROR, "the local node has not been configured, or is "
"not heartbeating\n");
ret = -EPROTO;
goto leave;
}

mlog(0, "register called for domain \"%s\"\n", domain);

retry:
Expand Down
67 changes: 61 additions & 6 deletions trunk/fs/ocfs2/stack_o2cb.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "cluster/masklog.h"
#include "cluster/nodemanager.h"
#include "cluster/heartbeat.h"
#include "cluster/tcp.h"

#include "stackglue.h"

Expand Down Expand Up @@ -255,6 +256,61 @@ static void o2cb_dump_lksb(struct ocfs2_dlm_lksb *lksb)
dlm_print_one_lock(lksb->lksb_o2dlm.lockid);
}

/*
* Check if this node is heartbeating and is connected to all other
* heartbeating nodes.
*/
static int o2cb_cluster_check(void)
{
u8 node_num;
int i;
unsigned long hbmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
unsigned long netmap[BITS_TO_LONGS(O2NM_MAX_NODES)];

node_num = o2nm_this_node();
if (node_num == O2NM_MAX_NODES) {
printk(KERN_ERR "o2cb: This node has not been configured.\n");
return -EINVAL;
}

/*
* o2dlm expects o2net sockets to be created. If not, then
* dlm_join_domain() fails with a stack of errors which are both cryptic
* and incomplete. The idea here is to detect upfront whether we have
* managed to connect to all nodes or not. If not, then list the nodes
* to allow the user to check the configuration (incorrect IP, firewall,
* etc.) Yes, this is racy. But its not the end of the world.
*/
#define O2CB_MAP_STABILIZE_COUNT 60
for (i = 0; i < O2CB_MAP_STABILIZE_COUNT; ++i) {
o2hb_fill_node_map(hbmap, sizeof(hbmap));
if (!test_bit(node_num, hbmap)) {
printk(KERN_ERR "o2cb: %s heartbeat has not been "
"started.\n", (o2hb_global_heartbeat_active() ?
"Global" : "Local"));
return -EINVAL;
}
o2net_fill_node_map(netmap, sizeof(netmap));
/* Force set the current node to allow easy compare */
set_bit(node_num, netmap);
if (!memcmp(hbmap, netmap, sizeof(hbmap)))
return 0;
if (i < O2CB_MAP_STABILIZE_COUNT)
msleep(1000);
}

printk(KERN_ERR "o2cb: This node could not connect to nodes:");
i = -1;
while ((i = find_next_bit(hbmap, O2NM_MAX_NODES,
i + 1)) < O2NM_MAX_NODES) {
if (!test_bit(i, netmap))
printk(" %u", i);
}
printk(".\n");

return -ENOTCONN;
}

/*
* Called from the dlm when it's about to evict a node. This is how the
* classic stack signals node death.
Expand All @@ -280,12 +336,11 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
BUG_ON(conn == NULL);
BUG_ON(conn->cc_proto == NULL);

/* for now we only have one cluster/node, make sure we see it
* in the heartbeat universe */
if (!o2hb_check_local_node_heartbeating()) {
if (o2hb_global_heartbeat_active())
mlog(ML_ERROR, "Global heartbeat not started\n");
rc = -EINVAL;
/* Ensure cluster stack is up and all nodes are connected */
rc = o2cb_cluster_check();
if (rc) {
printk(KERN_ERR "o2cb: Cluster check failed. Fix errors "
"before retrying.\n");
goto out;
}

Expand Down

0 comments on commit 69d99db

Please sign in to comment.