mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 14:11:52 +00:00
block/mq-deadline: Stop using per-CPU counters
Calculating the sum over all CPUs of per-CPU counters frequently is inefficient. Hence switch from per-CPU to individual counters. Three counters are protected by the mq-deadline spinlock since these are only accessed from contexts that already hold that spinlock. The fourth counter is atomic because protecting it with the mq-deadline spinlock would trigger lock contention. Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com> Reviewed-by: Niklas Cassel <Niklas.Cassel@wdc.com> Cc: Hannes Reinecke <hare@suse.de> Signed-off-by: Bart Van Assche <bvanassche@acm.org> Link: https://lore.kernel.org/r/20210927220328.1410161-4-bvanassche@acm.org Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
32f64cad97
commit
bce0363ed8
@ -51,17 +51,16 @@ enum dd_prio {
|
||||
|
||||
enum { DD_PRIO_COUNT = 3 };
|
||||
|
||||
/* I/O statistics per I/O priority. */
|
||||
/*
|
||||
* I/O statistics per I/O priority. It is fine if these counters overflow.
|
||||
* What matters is that these counters are at least as wide as
|
||||
* log2(max_outstanding_requests).
|
||||
*/
|
||||
struct io_stats_per_prio {
|
||||
local_t inserted;
|
||||
local_t merged;
|
||||
local_t dispatched;
|
||||
local_t completed;
|
||||
};
|
||||
|
||||
/* I/O statistics for all I/O priorities (enum dd_prio). */
|
||||
struct io_stats {
|
||||
struct io_stats_per_prio stats[DD_PRIO_COUNT];
|
||||
uint32_t inserted;
|
||||
uint32_t merged;
|
||||
uint32_t dispatched;
|
||||
atomic_t completed;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -74,6 +73,7 @@ struct dd_per_prio {
|
||||
struct list_head fifo_list[DD_DIR_COUNT];
|
||||
/* Next request in FIFO order. Read, write or both are NULL. */
|
||||
struct request *next_rq[DD_DIR_COUNT];
|
||||
struct io_stats_per_prio stats;
|
||||
};
|
||||
|
||||
struct deadline_data {
|
||||
@ -88,8 +88,6 @@ struct deadline_data {
|
||||
unsigned int batching; /* number of sequential requests made */
|
||||
unsigned int starved; /* times reads have starved writes */
|
||||
|
||||
struct io_stats __percpu *stats;
|
||||
|
||||
/*
|
||||
* settings that change how the i/o scheduler behaves
|
||||
*/
|
||||
@ -103,33 +101,6 @@ struct deadline_data {
|
||||
spinlock_t zone_lock;
|
||||
};
|
||||
|
||||
/* Count one event of type 'event_type' and with I/O priority 'prio' */
|
||||
#define dd_count(dd, event_type, prio) do { \
|
||||
struct io_stats *io_stats = get_cpu_ptr((dd)->stats); \
|
||||
\
|
||||
BUILD_BUG_ON(!__same_type((dd), struct deadline_data *)); \
|
||||
BUILD_BUG_ON(!__same_type((prio), enum dd_prio)); \
|
||||
local_inc(&io_stats->stats[(prio)].event_type); \
|
||||
put_cpu_ptr(io_stats); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Returns the total number of dd_count(dd, event_type, prio) calls across all
|
||||
* CPUs. No locking or barriers since it is fine if the returned sum is slightly
|
||||
* outdated.
|
||||
*/
|
||||
#define dd_sum(dd, event_type, prio) ({ \
|
||||
unsigned int cpu; \
|
||||
u32 sum = 0; \
|
||||
\
|
||||
BUILD_BUG_ON(!__same_type((dd), struct deadline_data *)); \
|
||||
BUILD_BUG_ON(!__same_type((prio), enum dd_prio)); \
|
||||
for_each_present_cpu(cpu) \
|
||||
sum += local_read(&per_cpu_ptr((dd)->stats, cpu)-> \
|
||||
stats[(prio)].event_type); \
|
||||
sum; \
|
||||
})
|
||||
|
||||
/* Maps an I/O priority class to a deadline scheduler priority. */
|
||||
static const enum dd_prio ioprio_class_to_prio[] = {
|
||||
[IOPRIO_CLASS_NONE] = DD_BE_PRIO,
|
||||
@ -233,7 +204,9 @@ static void dd_merged_requests(struct request_queue *q, struct request *req,
|
||||
const u8 ioprio_class = dd_rq_ioclass(next);
|
||||
const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
|
||||
|
||||
dd_count(dd, merged, prio);
|
||||
lockdep_assert_held(&dd->lock);
|
||||
|
||||
dd->per_prio[prio].stats.merged++;
|
||||
|
||||
/*
|
||||
* if next expires before rq, assign its expire time to rq
|
||||
@ -273,7 +246,11 @@ deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
|
||||
/* Number of requests queued for a given priority level. */
|
||||
static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
|
||||
{
|
||||
return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
|
||||
const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
|
||||
|
||||
lockdep_assert_held(&dd->lock);
|
||||
|
||||
return stats->inserted - atomic_read(&stats->completed);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -463,7 +440,7 @@ dispatch_request:
|
||||
done:
|
||||
ioprio_class = dd_rq_ioclass(rq);
|
||||
prio = ioprio_class_to_prio[ioprio_class];
|
||||
dd_count(dd, dispatched, prio);
|
||||
dd->per_prio[prio].stats.dispatched++;
|
||||
/*
|
||||
* If the request needs its target zone locked, do it.
|
||||
*/
|
||||
@ -542,18 +519,21 @@ static void dd_exit_sched(struct elevator_queue *e)
|
||||
|
||||
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
|
||||
struct dd_per_prio *per_prio = &dd->per_prio[prio];
|
||||
const struct io_stats_per_prio *stats = &per_prio->stats;
|
||||
uint32_t queued;
|
||||
|
||||
WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ]));
|
||||
WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
|
||||
WARN_ONCE(dd_queued(dd, prio) != 0,
|
||||
"statistics for priority %d: i %u m %u d %u c %u\n",
|
||||
prio, dd_sum(dd, inserted, prio),
|
||||
dd_sum(dd, merged, prio),
|
||||
dd_sum(dd, dispatched, prio),
|
||||
dd_sum(dd, completed, prio));
|
||||
}
|
||||
|
||||
free_percpu(dd->stats);
|
||||
spin_lock(&dd->lock);
|
||||
queued = dd_queued(dd, prio);
|
||||
spin_unlock(&dd->lock);
|
||||
|
||||
WARN_ONCE(queued != 0,
|
||||
"statistics for priority %d: i %u m %u d %u c %u\n",
|
||||
prio, stats->inserted, stats->merged,
|
||||
stats->dispatched, atomic_read(&stats->completed));
|
||||
}
|
||||
|
||||
kfree(dd);
|
||||
}
|
||||
@ -578,11 +558,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
|
||||
eq->elevator_data = dd;
|
||||
|
||||
dd->stats = alloc_percpu_gfp(typeof(*dd->stats),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!dd->stats)
|
||||
goto free_dd;
|
||||
|
||||
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
|
||||
struct dd_per_prio *per_prio = &dd->per_prio[prio];
|
||||
|
||||
@ -604,9 +579,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
q->elevator = eq;
|
||||
return 0;
|
||||
|
||||
free_dd:
|
||||
kfree(dd);
|
||||
|
||||
put_eq:
|
||||
kobject_put(&eq->kobj);
|
||||
return ret;
|
||||
@ -689,8 +661,9 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
blk_req_zone_write_unlock(rq);
|
||||
|
||||
prio = ioprio_class_to_prio[ioprio_class];
|
||||
per_prio = &dd->per_prio[prio];
|
||||
if (!rq->elv.priv[0]) {
|
||||
dd_count(dd, inserted, prio);
|
||||
per_prio->stats.inserted++;
|
||||
rq->elv.priv[0] = (void *)(uintptr_t)1;
|
||||
}
|
||||
|
||||
@ -701,7 +674,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
per_prio = &dd->per_prio[prio];
|
||||
if (at_head) {
|
||||
list_add(&rq->queuelist, &per_prio->dispatch);
|
||||
} else {
|
||||
@ -779,7 +751,7 @@ static void dd_finish_request(struct request *rq)
|
||||
if (!rq->elv.priv[0])
|
||||
return;
|
||||
|
||||
dd_count(dd, completed, prio);
|
||||
atomic_inc(&per_prio->stats.completed);
|
||||
|
||||
if (blk_queue_is_zoned(q)) {
|
||||
unsigned long flags;
|
||||
@ -966,28 +938,44 @@ static int dd_queued_show(void *data, struct seq_file *m)
|
||||
{
|
||||
struct request_queue *q = data;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
u32 rt, be, idle;
|
||||
|
||||
spin_lock(&dd->lock);
|
||||
rt = dd_queued(dd, DD_RT_PRIO);
|
||||
be = dd_queued(dd, DD_BE_PRIO);
|
||||
idle = dd_queued(dd, DD_IDLE_PRIO);
|
||||
spin_unlock(&dd->lock);
|
||||
|
||||
seq_printf(m, "%u %u %u\n", rt, be, idle);
|
||||
|
||||
seq_printf(m, "%u %u %u\n", dd_queued(dd, DD_RT_PRIO),
|
||||
dd_queued(dd, DD_BE_PRIO),
|
||||
dd_queued(dd, DD_IDLE_PRIO));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Number of requests owned by the block driver for a given priority. */
|
||||
static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
|
||||
{
|
||||
return dd_sum(dd, dispatched, prio) + dd_sum(dd, merged, prio)
|
||||
- dd_sum(dd, completed, prio);
|
||||
const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
|
||||
|
||||
lockdep_assert_held(&dd->lock);
|
||||
|
||||
return stats->dispatched + stats->merged -
|
||||
atomic_read(&stats->completed);
|
||||
}
|
||||
|
||||
static int dd_owned_by_driver_show(void *data, struct seq_file *m)
|
||||
{
|
||||
struct request_queue *q = data;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
u32 rt, be, idle;
|
||||
|
||||
spin_lock(&dd->lock);
|
||||
rt = dd_owned_by_driver(dd, DD_RT_PRIO);
|
||||
be = dd_owned_by_driver(dd, DD_BE_PRIO);
|
||||
idle = dd_owned_by_driver(dd, DD_IDLE_PRIO);
|
||||
spin_unlock(&dd->lock);
|
||||
|
||||
seq_printf(m, "%u %u %u\n", rt, be, idle);
|
||||
|
||||
seq_printf(m, "%u %u %u\n", dd_owned_by_driver(dd, DD_RT_PRIO),
|
||||
dd_owned_by_driver(dd, DD_BE_PRIO),
|
||||
dd_owned_by_driver(dd, DD_IDLE_PRIO));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user