Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 346190
b: refs/heads/master
c: 07be15b
h: refs/heads/master
v: v3
  • Loading branch information
Lars Ellenberg authored and Philipp Reisner committed Nov 8, 2012
1 parent 1e62763 commit bc0aab7
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 17 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 3ea35df83fe5e081c793a65361f912ec0835213b
refs/heads/master: 07be15b12c41dbc5ceae130be2e0b655f7611691
2 changes: 2 additions & 0 deletions trunk/drivers/block/drbd/drbd_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,7 @@ struct drbd_tconn { /* is a resource from the config file */
unsigned int epochs;
enum write_ordering_e write_ordering;

unsigned long last_reconnect_jif;
struct drbd_thread receiver;
struct drbd_thread worker;
struct drbd_thread asender;
Expand All @@ -881,6 +882,7 @@ struct drbd_conf {
struct block_device *this_bdev;
struct gendisk *vdisk;

unsigned long last_reattach_jif;
struct drbd_work resync_work,
unplug_work,
go_diskless,
Expand Down
51 changes: 36 additions & 15 deletions trunk/drivers/block/drbd/drbd_req.c
Original file line number Diff line number Diff line change
Expand Up @@ -1171,45 +1171,66 @@ void request_timer_fn(unsigned long data)
struct list_head *le;
struct net_conf *nc;
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
unsigned long now;

rcu_read_lock();
nc = rcu_dereference(tconn->net_conf);
ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0;
if (nc && mdev->state.conn >= C_WF_REPORT_PARAMS)
ent = nc->timeout * HZ/10 * nc->ko_count;

if (get_ldev(mdev)) {
if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10;
put_ldev(mdev);
}
rcu_read_unlock();

et = min_not_zero(dt, ent);

if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
if (!et)
return; /* Recurring timer stopped */

now = jiffies;

spin_lock_irq(&tconn->req_lock);
le = &tconn->oldest_tle->requests;
if (list_empty(le)) {
spin_unlock_irq(&tconn->req_lock);
mod_timer(&mdev->request_timer, jiffies + et);
mod_timer(&mdev->request_timer, now + et);
return;
}

le = le->prev;
req = list_entry(le, struct drbd_request, tl_requests);
if (ent && req->rq_state & RQ_NET_PENDING) {
if (time_is_before_eq_jiffies(req->start_time + ent)) {
dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
}

/* The request is considered timed out, if
* - we have some effective timeout from the configuration,
* with above state restrictions applied,
* - the oldest request is waiting for a response from the network
* resp. the local disk,
* - the oldest request is in fact older than the effective timeout,
* - the connection was established (resp. disk was attached)
* for longer than the timeout already.
* Note that for 32bit jiffies and very stable connections/disks,
* we may have a wrap around, which is catched by
* !time_in_range(now, last_..._jif, last_..._jif + timeout).
*
* Side effect: once per 32bit wrap-around interval, which means every
* ~198 days with 250 HZ, we have a window where the timeout would need
* to expire twice (worst case) to become effective. Good enough.
*/
if (ent && req->rq_state & RQ_NET_PENDING &&
time_after(now, req->start_time + ent) &&
!time_in_range(now, tconn->last_reconnect_jif, tconn->last_reconnect_jif + ent)) {
dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
}
if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev) {
if (time_is_before_eq_jiffies(req->start_time + dt)) {
dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
__drbd_chk_io_error(mdev, 1);
}
if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev &&
time_after(now, req->start_time + dt) &&
!time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
__drbd_chk_io_error(mdev, 1);
}
nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et;
nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
spin_unlock_irq(&tconn->req_lock);
mod_timer(&mdev->request_timer, nt);
}
16 changes: 15 additions & 1 deletion trunk/drivers/block/drbd/drbd_state.c
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,13 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
drbd_resume_al(mdev);

/* remember last attach time so request_timer_fn() won't
* kill newly established sessions while we are still trying to thaw
* previously frozen IO */
if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
ns.disk > D_NEGOTIATING)
mdev->last_reattach_jif = jiffies;

ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
if (ascw) {
ascw->os = os;
Expand Down Expand Up @@ -1609,8 +1616,15 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state
enum drbd_state_rv rv;
int vnr, number_of_volumes = 0;

if (mask.conn == C_MASK)
if (mask.conn == C_MASK) {
/* remember last connect time so request_timer_fn() won't
* kill newly established sessions while we are still trying to thaw
* previously frozen IO */
if (tconn->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS)
tconn->last_reconnect_jif = jiffies;

tconn->cstate = val.conn;
}

rcu_read_lock();
idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Expand Down

0 comments on commit bc0aab7

Please sign in to comment.