aoe: use high-resolution RTTs with fallback to low-res
These changes improve the accuracy of the decision about whether it's time to retransmit an AoE command by using the microsecond-resolution gettimeofday instead of jiffies. Because the system time can jump suddenly, the decision reverts to using jiffies if the high-resolution time difference is relatively large. Otherwise the AoE targets could be considered failed inappropriately. Signed-off-by: Ed Cashin <ecashin@coraid.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
0d555ecfa4
commit
5f0c9c48e7
@ -88,8 +88,7 @@ enum {
|
|||||||
TIMERTICK = HZ / 10,
|
TIMERTICK = HZ / 10,
|
||||||
RTTSCALE = 8,
|
RTTSCALE = 8,
|
||||||
RTTDSCALE = 3,
|
RTTDSCALE = 3,
|
||||||
MAXTIMER = HZ << 1,
|
RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE,
|
||||||
RTTAVG_INIT = HZ / 4 << RTTSCALE,
|
|
||||||
RTTDEV_INIT = RTTAVG_INIT / 4,
|
RTTDEV_INIT = RTTAVG_INIT / 4,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -106,6 +105,8 @@ struct buf {
|
|||||||
struct frame {
|
struct frame {
|
||||||
struct list_head head;
|
struct list_head head;
|
||||||
u32 tag;
|
u32 tag;
|
||||||
|
struct timeval sent; /* high-res time packet was sent */
|
||||||
|
u32 sent_jiffs; /* low-res jiffies-based sent time */
|
||||||
ulong waited;
|
ulong waited;
|
||||||
struct aoetgt *t; /* parent target I belong to */
|
struct aoetgt *t; /* parent target I belong to */
|
||||||
sector_t lba;
|
sector_t lba;
|
||||||
@ -143,11 +144,11 @@ struct aoedev {
|
|||||||
struct aoedev *next;
|
struct aoedev *next;
|
||||||
ulong sysminor;
|
ulong sysminor;
|
||||||
ulong aoemajor;
|
ulong aoemajor;
|
||||||
|
u32 rttavg; /* scaled AoE round trip time average */
|
||||||
|
u32 rttdev; /* scaled round trip time mean deviation */
|
||||||
u16 aoeminor;
|
u16 aoeminor;
|
||||||
u16 flags;
|
u16 flags;
|
||||||
u16 nopen; /* (bd_openers isn't available without sleeping) */
|
u16 nopen; /* (bd_openers isn't available without sleeping) */
|
||||||
u16 rttavg; /* scaled AoE round trip time average */
|
|
||||||
u16 rttdev; /* scaled round trip time mean deviation */
|
|
||||||
u16 fw_ver; /* version of blade's firmware */
|
u16 fw_ver; /* version of blade's firmware */
|
||||||
u16 lasttag; /* last tag sent */
|
u16 lasttag; /* last tag sent */
|
||||||
u16 useme;
|
u16 useme;
|
||||||
|
@ -387,6 +387,8 @@ aoecmd_ata_rw(struct aoedev *d)
|
|||||||
skb->dev = t->ifp->nd;
|
skb->dev = t->ifp->nd;
|
||||||
skb = skb_clone(skb, GFP_ATOMIC);
|
skb = skb_clone(skb, GFP_ATOMIC);
|
||||||
if (skb) {
|
if (skb) {
|
||||||
|
do_gettimeofday(&f->sent);
|
||||||
|
f->sent_jiffs = (u32) jiffies;
|
||||||
__skb_queue_head_init(&queue);
|
__skb_queue_head_init(&queue);
|
||||||
__skb_queue_tail(&queue, skb);
|
__skb_queue_tail(&queue, skb);
|
||||||
aoenet_xmit(&queue);
|
aoenet_xmit(&queue);
|
||||||
@ -475,11 +477,45 @@ resend(struct aoedev *d, struct frame *f)
|
|||||||
skb = skb_clone(skb, GFP_ATOMIC);
|
skb = skb_clone(skb, GFP_ATOMIC);
|
||||||
if (skb == NULL)
|
if (skb == NULL)
|
||||||
return;
|
return;
|
||||||
|
do_gettimeofday(&f->sent);
|
||||||
|
f->sent_jiffs = (u32) jiffies;
|
||||||
__skb_queue_head_init(&queue);
|
__skb_queue_head_init(&queue);
|
||||||
__skb_queue_tail(&queue, skb);
|
__skb_queue_tail(&queue, skb);
|
||||||
aoenet_xmit(&queue);
|
aoenet_xmit(&queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
tsince_hr(struct frame *f)
|
||||||
|
{
|
||||||
|
struct timeval now;
|
||||||
|
int n;
|
||||||
|
|
||||||
|
do_gettimeofday(&now);
|
||||||
|
n = now.tv_usec - f->sent.tv_usec;
|
||||||
|
n += (now.tv_sec - f->sent.tv_sec) * USEC_PER_SEC;
|
||||||
|
|
||||||
|
if (n < 0)
|
||||||
|
n = -n;
|
||||||
|
|
||||||
|
/* For relatively long periods, use jiffies to avoid
|
||||||
|
* discrepancies caused by updates to the system time.
|
||||||
|
*
|
||||||
|
* On system with HZ of 1000, 32-bits is over 49 days
|
||||||
|
* worth of jiffies, or over 71 minutes worth of usecs.
|
||||||
|
*
|
||||||
|
* Jiffies overflow is handled by subtraction of unsigned ints:
|
||||||
|
* (gdb) print (unsigned) 2 - (unsigned) 0xfffffffe
|
||||||
|
* $3 = 4
|
||||||
|
* (gdb)
|
||||||
|
*/
|
||||||
|
if (n > USEC_PER_SEC / 4) {
|
||||||
|
n = ((u32) jiffies) - f->sent_jiffs;
|
||||||
|
n *= USEC_PER_SEC / HZ;
|
||||||
|
}
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
tsince(u32 tag)
|
tsince(u32 tag)
|
||||||
{
|
{
|
||||||
@ -489,7 +525,7 @@ tsince(u32 tag)
|
|||||||
n -= tag & 0xffff;
|
n -= tag & 0xffff;
|
||||||
if (n < 0)
|
if (n < 0)
|
||||||
n += 1<<16;
|
n += 1<<16;
|
||||||
return n;
|
return jiffies_to_usecs(n + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct aoeif *
|
static struct aoeif *
|
||||||
@ -552,6 +588,7 @@ sthtith(struct aoedev *d)
|
|||||||
nf->bv = f->bv;
|
nf->bv = f->bv;
|
||||||
nf->bv_off = f->bv_off;
|
nf->bv_off = f->bv_off;
|
||||||
nf->waited = 0;
|
nf->waited = 0;
|
||||||
|
nf->sent_jiffs = f->sent_jiffs;
|
||||||
f->skb = skb;
|
f->skb = skb;
|
||||||
aoe_freetframe(f);
|
aoe_freetframe(f);
|
||||||
ht->nout--;
|
ht->nout--;
|
||||||
@ -621,7 +658,7 @@ rexmit_timer(ulong vp)
|
|||||||
head = &d->factive[i];
|
head = &d->factive[i];
|
||||||
list_for_each_safe(pos, nx, head) {
|
list_for_each_safe(pos, nx, head) {
|
||||||
f = list_entry(pos, struct frame, head);
|
f = list_entry(pos, struct frame, head);
|
||||||
if (tsince(f->tag) < timeout)
|
if (tsince_hr(f) < timeout)
|
||||||
break; /* end of expired frames */
|
break; /* end of expired frames */
|
||||||
/* move to flist for later processing */
|
/* move to flist for later processing */
|
||||||
list_move_tail(pos, &flist);
|
list_move_tail(pos, &flist);
|
||||||
@ -632,8 +669,8 @@ rexmit_timer(ulong vp)
|
|||||||
while (!list_empty(&flist)) {
|
while (!list_empty(&flist)) {
|
||||||
pos = flist.next;
|
pos = flist.next;
|
||||||
f = list_entry(pos, struct frame, head);
|
f = list_entry(pos, struct frame, head);
|
||||||
n = f->waited += tsince(f->tag);
|
n = f->waited += tsince_hr(f);
|
||||||
n /= HZ;
|
n /= USEC_PER_SEC;
|
||||||
if (n > aoe_deadsecs) {
|
if (n > aoe_deadsecs) {
|
||||||
/* Waited too long. Device failure.
|
/* Waited too long. Device failure.
|
||||||
* Hang all frames on first hash bucket for downdev
|
* Hang all frames on first hash bucket for downdev
|
||||||
@ -1193,12 +1230,12 @@ aoecmd_ata_rsp(struct sk_buff *skb)
|
|||||||
n = be32_to_cpu(get_unaligned(&h->tag));
|
n = be32_to_cpu(get_unaligned(&h->tag));
|
||||||
f = getframe(d, n);
|
f = getframe(d, n);
|
||||||
if (f) {
|
if (f) {
|
||||||
calc_rttavg(d, f->t, tsince(n));
|
calc_rttavg(d, f->t, tsince_hr(f));
|
||||||
f->t->nout--;
|
f->t->nout--;
|
||||||
} else {
|
} else {
|
||||||
f = getframe_deferred(d, n);
|
f = getframe_deferred(d, n);
|
||||||
if (f) {
|
if (f) {
|
||||||
calc_rttavg(d, NULL, tsince(n));
|
calc_rttavg(d, NULL, tsince_hr(f));
|
||||||
} else {
|
} else {
|
||||||
calc_rttavg(d, NULL, tsince(n));
|
calc_rttavg(d, NULL, tsince(n));
|
||||||
spin_unlock_irqrestore(&d->lock, flags);
|
spin_unlock_irqrestore(&d->lock, flags);
|
||||||
@ -1276,7 +1313,13 @@ aoecmd_ata_id(struct aoedev *d)
|
|||||||
d->rttdev = RTTDEV_INIT;
|
d->rttdev = RTTDEV_INIT;
|
||||||
d->timer.function = rexmit_timer;
|
d->timer.function = rexmit_timer;
|
||||||
|
|
||||||
return skb_clone(skb, GFP_ATOMIC);
|
skb = skb_clone(skb, GFP_ATOMIC);
|
||||||
|
if (skb) {
|
||||||
|
do_gettimeofday(&f->sent);
|
||||||
|
f->sent_jiffs = (u32) jiffies;
|
||||||
|
}
|
||||||
|
|
||||||
|
return skb;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct aoetgt *
|
static struct aoetgt *
|
||||||
|
Loading…
Reference in New Issue
Block a user