Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 212795
b: refs/heads/master
c: b1c5ebf
h: refs/heads/master
i:
  212793: 2009620
  212791: a721579
v: v3
  • Loading branch information
Sunil Mushran committed Oct 8, 2010
1 parent 9120d34 commit 87ad4f7
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 1 deletion.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 43182d2a799865872041b6e4d8387131e9462f56
refs/heads/master: b1c5ebfbe398b3360614a4788c02061cd153e60a
41 changes: 41 additions & 0 deletions trunk/fs/ocfs2/cluster/heartbeat.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,12 @@ static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue);
* - o2hb_live_region_bitmap tracks live regions (seen steady iterations).
* - o2hb_quorum_region_bitmap tracks live regions that have seen all nodes
* heartbeat on it.
* - o2hb_failed_region_bitmap tracks the regions that have seen io timeouts.
*/
static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
static unsigned long o2hb_live_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
static unsigned long o2hb_quorum_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];

#define O2HB_DB_TYPE_LIVENODES 0
struct o2hb_debug_buf {
Expand Down Expand Up @@ -217,15 +219,48 @@ struct o2hb_bio_wait_ctxt {
int wc_error;
};

static int o2hb_pop_count(void *map, int count)
{
int i = -1, pop = 0;

while ((i = find_next_bit(map, count, i + 1)) < count)
pop++;
return pop;
}

static void o2hb_write_timeout(struct work_struct *work)
{
int failed, quorum;
unsigned long flags;
struct o2hb_region *reg =
container_of(work, struct o2hb_region,
hr_write_timeout_work.work);

mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
"milliseconds\n", reg->hr_dev_name,
jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));

if (o2hb_global_heartbeat_active()) {
spin_lock_irqsave(&o2hb_live_lock, flags);
if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
set_bit(reg->hr_region_num, o2hb_failed_region_bitmap);
failed = o2hb_pop_count(&o2hb_failed_region_bitmap,
O2NM_MAX_REGIONS);
quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap,
O2NM_MAX_REGIONS);
spin_unlock_irqrestore(&o2hb_live_lock, flags);

mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n",
quorum, failed);

/*
* Fence if the number of failed regions >= half the number
* of quorum regions
*/
if ((failed << 1) < quorum)
return;
}

o2quo_disk_timeout();
}

Expand All @@ -234,6 +269,11 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg)
mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
O2HB_MAX_WRITE_TIMEOUT_MS);

if (o2hb_global_heartbeat_active()) {
spin_lock(&o2hb_live_lock);
clear_bit(reg->hr_region_num, o2hb_failed_region_bitmap);
spin_unlock(&o2hb_live_lock);
}
cancel_delayed_work(&reg->hr_write_timeout_work);
reg->hr_last_timeout_start = jiffies;
schedule_delayed_work(&reg->hr_write_timeout_work,
Expand Down Expand Up @@ -1173,6 +1213,7 @@ int o2hb_init(void)
memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap));
memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap));
memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));

return o2hb_debug_init();
}
Expand Down

0 comments on commit 87ad4f7

Please sign in to comment.