block, bfq: add/remove entity weights correctly

To keep I/O throughput high as often as possible, BFQ performs
I/O-dispatch plugging (aka device idling) only when beneficial exactly
for throughput, or when needed for service guarantees (low latency,
fairness). An important case where the latter condition holds is when
the scenario is 'asymmetric' in terms of weights: i.e., when some
bfq_queue or whole group of queues has a higher weight, and thus has
to receive more service, than other queues or groups. Without dispatch
plugging, lower-weight queues/groups may unjustly steal bandwidth to
higher-weight queues/groups.

To detect asymmetric scenarios, BFQ checks some sufficient
conditions. One of these conditions is that active groups have
different weights. BFQ controls this condition by maintaining a
special set of unique weights of active groups
(group_weights_tree). To this purpose, in the function
bfq_active_insert/bfq_active_extract BFQ adds/removes the weight of a
group to/from this set.

Unfortunately, the function bfq_active_extract may happen to be
invoked also for a group that is still active (to preserve the correct
update of the next queue to serve, see comments in function
bfq_no_longer_next_in_service() for details). In this case, removing
the weight of the group makes the set group_weights_tree
inconsistent. Service-guarantee violations follow.

This commit addresses this issue by moving group_weights_tree
insertions from their previous location (in bfq_active_insert) into
the function __bfq_activate_entity, and by moving group_weights_tree
extractions from bfq_active_extract to when the entity that represents
a group remains throughly idle, i.e., with no request either enqueued
or dispatched.

Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Paolo Valente 2018-06-25 21:55:34 +02:00 committed by Jens Axboe
parent 6a5ac98465
commit 0471559c2f
3 changed files with 59 additions and 17 deletions

View File

@ -742,8 +742,9 @@ inc_counter:
* See the comments to the function bfq_weights_tree_add() for considerations * See the comments to the function bfq_weights_tree_add() for considerations
* about overhead. * about overhead.
*/ */
void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_entity *entity, void __bfq_weights_tree_remove(struct bfq_data *bfqd,
struct rb_root *root) struct bfq_entity *entity,
struct rb_root *root)
{ {
if (!entity->weight_counter) if (!entity->weight_counter)
return; return;
@ -759,6 +760,43 @@ reset_entity_pointer:
entity->weight_counter = NULL; entity->weight_counter = NULL;
} }
/*
* Invoke __bfq_weights_tree_remove on bfqq and all its inactive
* parent entities.
*/
void bfq_weights_tree_remove(struct bfq_data *bfqd,
struct bfq_queue *bfqq)
{
struct bfq_entity *entity = bfqq->entity.parent;
__bfq_weights_tree_remove(bfqd, &bfqq->entity,
&bfqd->queue_weights_tree);
for_each_entity(entity) {
struct bfq_sched_data *sd = entity->my_sched_data;
if (sd->next_in_service || sd->in_service_entity) {
/*
* entity is still active, because either
* next_in_service or in_service_entity is not
* NULL (see the comments on the definition of
* next_in_service for details on why
* in_service_entity must be checked too).
*
* As a consequence, the weight of entity is
* not to be removed. In addition, if entity
* is active, then its parent entities are
* active as well, and thus their weights are
* not to be removed either. In the end, this
* loop must stop here.
*/
break;
}
__bfq_weights_tree_remove(bfqd, entity,
&bfqd->group_weights_tree);
}
}
/* /*
* Return expired entry, or NULL to just start from scratch in rbtree. * Return expired entry, or NULL to just start from scratch in rbtree.
*/ */
@ -4582,8 +4620,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
*/ */
bfqq->budget_timeout = jiffies; bfqq->budget_timeout = jiffies;
bfq_weights_tree_remove(bfqd, &bfqq->entity, bfq_weights_tree_remove(bfqd, bfqq);
&bfqd->queue_weights_tree);
} }
now_ns = ktime_get_ns(); now_ns = ktime_get_ns();

View File

@ -827,8 +827,11 @@ struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic);
void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
struct rb_root *root); struct rb_root *root);
void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_entity *entity, void __bfq_weights_tree_remove(struct bfq_data *bfqd,
struct rb_root *root); struct bfq_entity *entity,
struct rb_root *root);
void bfq_weights_tree_remove(struct bfq_data *bfqd,
struct bfq_queue *bfqq);
void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool compensate, enum bfqq_expiration reason); bool compensate, enum bfqq_expiration reason);
void bfq_put_queue(struct bfq_queue *bfqq); void bfq_put_queue(struct bfq_queue *bfqq);

View File

@ -499,9 +499,6 @@ static void bfq_active_insert(struct bfq_service_tree *st,
if (bfqq) if (bfqq)
list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
#ifdef CONFIG_BFQ_GROUP_IOSCHED #ifdef CONFIG_BFQ_GROUP_IOSCHED
else /* bfq_group */
bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree);
if (bfqg != bfqd->root_group) if (bfqg != bfqd->root_group)
bfqg->active_entities++; bfqg->active_entities++;
#endif #endif
@ -601,10 +598,6 @@ static void bfq_active_extract(struct bfq_service_tree *st,
if (bfqq) if (bfqq)
list_del(&bfqq->bfqq_list); list_del(&bfqq->bfqq_list);
#ifdef CONFIG_BFQ_GROUP_IOSCHED #ifdef CONFIG_BFQ_GROUP_IOSCHED
else /* bfq_group */
bfq_weights_tree_remove(bfqd, entity,
&bfqd->group_weights_tree);
if (bfqg != bfqd->root_group) if (bfqg != bfqd->root_group)
bfqg->active_entities--; bfqg->active_entities--;
#endif #endif
@ -799,7 +792,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
if (prev_weight != new_weight) { if (prev_weight != new_weight) {
root = bfqq ? &bfqd->queue_weights_tree : root = bfqq ? &bfqd->queue_weights_tree :
&bfqd->group_weights_tree; &bfqd->group_weights_tree;
bfq_weights_tree_remove(bfqd, entity, root); __bfq_weights_tree_remove(bfqd, entity, root);
} }
entity->weight = new_weight; entity->weight = new_weight;
/* /*
@ -971,7 +964,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
* one of its children receives a new request. * one of its children receives a new request.
* *
* Basically, this function updates the timestamps of entity and * Basically, this function updates the timestamps of entity and
* inserts entity into its active tree, ater possibly extracting it * inserts entity into its active tree, after possibly extracting it
* from its idle tree. * from its idle tree.
*/ */
static void __bfq_activate_entity(struct bfq_entity *entity, static void __bfq_activate_entity(struct bfq_entity *entity,
@ -1015,6 +1008,16 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
entity->on_st = true; entity->on_st = true;
} }
#ifdef BFQ_GROUP_IOSCHED_ENABLED
if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */
struct bfq_group *bfqg =
container_of(entity, struct bfq_group, entity);
bfq_weights_tree_add(bfqg->bfqd, entity,
&bfqd->group_weights_tree);
}
#endif
bfq_update_fin_time_enqueue(entity, st, backshifted); bfq_update_fin_time_enqueue(entity, st, backshifted);
} }
@ -1664,8 +1667,7 @@ void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqd->busy_queues--; bfqd->busy_queues--;
if (!bfqq->dispatched) if (!bfqq->dispatched)
bfq_weights_tree_remove(bfqd, &bfqq->entity, bfq_weights_tree_remove(bfqd, bfqq);
&bfqd->queue_weights_tree);
if (bfqq->wr_coeff > 1) if (bfqq->wr_coeff > 1)
bfqd->wr_busy_queues--; bfqd->wr_busy_queues--;