drbd: fix resend/resubmit of frozen IO
DRBD can freeze IO, due to fencing policy (fencing resource-and-stonith), or because we lost access to data (on-no-data-accessible suspend-io). Resuming from there (re-connect, or re-attach, or explicit admin intervention) should "just work". Unfortunately, if the re-attach/re-connect did not happen within the timeout, since the commit drbd: Implemented real timeout checking for request processing time if so configured, the request_timer_fn() would timeout and detach/disconnect virtually immediately. This change tracks the most recent attach and connect, and does not timeout within <configured timeout interval> after attach/connect. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
parent
3ea35df83f
commit
07be15b12c
@ -859,6 +859,7 @@ struct drbd_tconn { /* is a resource from the config file */
|
|||||||
unsigned int epochs;
|
unsigned int epochs;
|
||||||
enum write_ordering_e write_ordering;
|
enum write_ordering_e write_ordering;
|
||||||
|
|
||||||
|
unsigned long last_reconnect_jif;
|
||||||
struct drbd_thread receiver;
|
struct drbd_thread receiver;
|
||||||
struct drbd_thread worker;
|
struct drbd_thread worker;
|
||||||
struct drbd_thread asender;
|
struct drbd_thread asender;
|
||||||
@ -881,6 +882,7 @@ struct drbd_conf {
|
|||||||
struct block_device *this_bdev;
|
struct block_device *this_bdev;
|
||||||
struct gendisk *vdisk;
|
struct gendisk *vdisk;
|
||||||
|
|
||||||
|
unsigned long last_reattach_jif;
|
||||||
struct drbd_work resync_work,
|
struct drbd_work resync_work,
|
||||||
unplug_work,
|
unplug_work,
|
||||||
go_diskless,
|
go_diskless,
|
||||||
|
@ -1171,12 +1171,14 @@ void request_timer_fn(unsigned long data)
|
|||||||
struct list_head *le;
|
struct list_head *le;
|
||||||
struct net_conf *nc;
|
struct net_conf *nc;
|
||||||
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
|
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
|
||||||
|
unsigned long now;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
nc = rcu_dereference(tconn->net_conf);
|
nc = rcu_dereference(tconn->net_conf);
|
||||||
ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0;
|
if (nc && mdev->state.conn >= C_WF_REPORT_PARAMS)
|
||||||
|
ent = nc->timeout * HZ/10 * nc->ko_count;
|
||||||
|
|
||||||
if (get_ldev(mdev)) {
|
if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
|
||||||
dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10;
|
dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10;
|
||||||
put_ldev(mdev);
|
put_ldev(mdev);
|
||||||
}
|
}
|
||||||
@ -1184,32 +1186,51 @@ void request_timer_fn(unsigned long data)
|
|||||||
|
|
||||||
et = min_not_zero(dt, ent);
|
et = min_not_zero(dt, ent);
|
||||||
|
|
||||||
if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
|
if (!et)
|
||||||
return; /* Recurring timer stopped */
|
return; /* Recurring timer stopped */
|
||||||
|
|
||||||
|
now = jiffies;
|
||||||
|
|
||||||
spin_lock_irq(&tconn->req_lock);
|
spin_lock_irq(&tconn->req_lock);
|
||||||
le = &tconn->oldest_tle->requests;
|
le = &tconn->oldest_tle->requests;
|
||||||
if (list_empty(le)) {
|
if (list_empty(le)) {
|
||||||
spin_unlock_irq(&tconn->req_lock);
|
spin_unlock_irq(&tconn->req_lock);
|
||||||
mod_timer(&mdev->request_timer, jiffies + et);
|
mod_timer(&mdev->request_timer, now + et);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
le = le->prev;
|
le = le->prev;
|
||||||
req = list_entry(le, struct drbd_request, tl_requests);
|
req = list_entry(le, struct drbd_request, tl_requests);
|
||||||
if (ent && req->rq_state & RQ_NET_PENDING) {
|
|
||||||
if (time_is_before_eq_jiffies(req->start_time + ent)) {
|
/* The request is considered timed out, if
|
||||||
|
* - we have some effective timeout from the configuration,
|
||||||
|
* with above state restrictions applied,
|
||||||
|
* - the oldest request is waiting for a response from the network
|
||||||
|
* resp. the local disk,
|
||||||
|
* - the oldest request is in fact older than the effective timeout,
|
||||||
|
* - the connection was established (resp. disk was attached)
|
||||||
|
* for longer than the timeout already.
|
||||||
|
* Note that for 32bit jiffies and very stable connections/disks,
|
||||||
|
* we may have a wrap around, which is catched by
|
||||||
|
* !time_in_range(now, last_..._jif, last_..._jif + timeout).
|
||||||
|
*
|
||||||
|
* Side effect: once per 32bit wrap-around interval, which means every
|
||||||
|
* ~198 days with 250 HZ, we have a window where the timeout would need
|
||||||
|
* to expire twice (worst case) to become effective. Good enough.
|
||||||
|
*/
|
||||||
|
if (ent && req->rq_state & RQ_NET_PENDING &&
|
||||||
|
time_after(now, req->start_time + ent) &&
|
||||||
|
!time_in_range(now, tconn->last_reconnect_jif, tconn->last_reconnect_jif + ent)) {
|
||||||
dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
|
dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
|
||||||
_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
|
_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
|
||||||
}
|
}
|
||||||
}
|
if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev &&
|
||||||
if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev) {
|
time_after(now, req->start_time + dt) &&
|
||||||
if (time_is_before_eq_jiffies(req->start_time + dt)) {
|
!time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
|
||||||
dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
|
dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
|
||||||
__drbd_chk_io_error(mdev, 1);
|
__drbd_chk_io_error(mdev, 1);
|
||||||
}
|
}
|
||||||
}
|
nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
|
||||||
nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et;
|
|
||||||
spin_unlock_irq(&tconn->req_lock);
|
spin_unlock_irq(&tconn->req_lock);
|
||||||
mod_timer(&mdev->request_timer, nt);
|
mod_timer(&mdev->request_timer, nt);
|
||||||
}
|
}
|
||||||
|
@ -1075,6 +1075,13 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
|
|||||||
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
|
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
|
||||||
drbd_resume_al(mdev);
|
drbd_resume_al(mdev);
|
||||||
|
|
||||||
|
/* remember last attach time so request_timer_fn() won't
|
||||||
|
* kill newly established sessions while we are still trying to thaw
|
||||||
|
* previously frozen IO */
|
||||||
|
if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
|
||||||
|
ns.disk > D_NEGOTIATING)
|
||||||
|
mdev->last_reattach_jif = jiffies;
|
||||||
|
|
||||||
ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
|
ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
|
||||||
if (ascw) {
|
if (ascw) {
|
||||||
ascw->os = os;
|
ascw->os = os;
|
||||||
@ -1609,8 +1616,15 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state
|
|||||||
enum drbd_state_rv rv;
|
enum drbd_state_rv rv;
|
||||||
int vnr, number_of_volumes = 0;
|
int vnr, number_of_volumes = 0;
|
||||||
|
|
||||||
if (mask.conn == C_MASK)
|
if (mask.conn == C_MASK) {
|
||||||
|
/* remember last connect time so request_timer_fn() won't
|
||||||
|
* kill newly established sessions while we are still trying to thaw
|
||||||
|
* previously frozen IO */
|
||||||
|
if (tconn->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS)
|
||||||
|
tconn->last_reconnect_jif = jiffies;
|
||||||
|
|
||||||
tconn->cstate = val.conn;
|
tconn->cstate = val.conn;
|
||||||
|
}
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
idr_for_each_entry(&tconn->volumes, mdev, vnr) {
|
idr_for_each_entry(&tconn->volumes, mdev, vnr) {
|
||||||
|
Loading…
Reference in New Issue
Block a user