blk-mq: improve tag waiting setup for non-shared tags

If we run out of driver tags, we currently treat shared and non-shared
tags the same - both cases hook into the tag waitqueue. This is a bit
more costly than it needs to be on unshared tags, since we have to both
grab the hctx lock, and the waitqueue lock (and disable interrupts).
For the non-shared case, we can simply mark the queue as needing a
restart.

Split blk_mq_dispatch_wait_add() to account for both cases, and
rename it to blk_mq_mark_tag_wait() to better reflect what it
does now.

Without this patch, shared and non-shared performance is about the same
with 4 fio thread hammering on a single null_blk device (~410K, at 75%
sys). With the patch, the shared case is the same, but the non-shared
tags case runs at 431K at 71% sys.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2017-11-09 16:10:13 -07:00
parent 15f7b41f70
commit f906a6a0f4

View File

@ -1006,44 +1006,68 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
return 1;
}
static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx **hctx,
struct request *rq)
/*
* Mark us waiting for a tag. For shared tags, this involves hooking us into
* the tag wakeups. For non-shared tags, we can simply mark us nedeing a
* restart. For both caes, take care to check the condition again after
* marking us as waiting.
*/
static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
struct request *rq)
{
struct blk_mq_hw_ctx *this_hctx = *hctx;
wait_queue_entry_t *wait = &this_hctx->dispatch_wait;
bool shared_tags = (this_hctx->flags & BLK_MQ_F_TAG_SHARED) != 0;
struct sbq_wait_state *ws;
wait_queue_entry_t *wait;
bool ret;
if (!list_empty_careful(&wait->entry))
return false;
if (!shared_tags) {
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state))
set_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state);
} else {
wait = &this_hctx->dispatch_wait;
if (!list_empty_careful(&wait->entry))
return false;
spin_lock(&this_hctx->lock);
if (!list_empty(&wait->entry)) {
spin_unlock(&this_hctx->lock);
return false;
spin_lock(&this_hctx->lock);
if (!list_empty(&wait->entry)) {
spin_unlock(&this_hctx->lock);
return false;
}
ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx);
add_wait_queue(&ws->wait, wait);
}
ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx);
add_wait_queue(&ws->wait, wait);
/*
* It's possible that a tag was freed in the window between the
* allocation failure and adding the hardware queue to the wait
* queue.
*/
if (!blk_mq_get_driver_tag(rq, hctx, false)) {
spin_unlock(&this_hctx->lock);
return false;
}
ret = blk_mq_get_driver_tag(rq, hctx, false);
/*
* We got a tag, remove ourselves from the wait queue to ensure
* someone else gets the wakeup.
*/
spin_lock_irq(&ws->wait.lock);
list_del_init(&wait->entry);
spin_unlock_irq(&ws->wait.lock);
spin_unlock(&this_hctx->lock);
return true;
if (!shared_tags) {
/*
* Don't clear RESTART here, someone else could have set it.
* At most this will cost an extra queue run.
*/
return ret;
} else {
if (!ret) {
spin_unlock(&this_hctx->lock);
return false;
}
/*
* We got a tag, remove ourselves from the wait queue to ensure
* someone else gets the wakeup.
*/
spin_lock_irq(&ws->wait.lock);
list_del_init(&wait->entry);
spin_unlock_irq(&ws->wait.lock);
spin_unlock(&this_hctx->lock);
return true;
}
}
bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
@ -1076,10 +1100,15 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
* before we add this entry back on the dispatch list,
* we'll re-run it below.
*/
if (!blk_mq_dispatch_wait_add(&hctx, rq)) {
if (!blk_mq_mark_tag_wait(&hctx, rq)) {
if (got_budget)
blk_mq_put_dispatch_budget(hctx);
no_tag = true;
/*
* For non-shared tags, the RESTART check
* will suffice.
*/
if (hctx->flags & BLK_MQ_F_TAG_SHARED)
no_tag = true;
break;
}
}