forked from Minki/linux
io_uring: allow application controlled CQ ring size
We currently size the CQ ring as twice the SQ ring, to allow some flexibility in not overflowing the CQ ring. This is done because the SQE life time is different than that of the IO request itself, the SQE is consumed as soon as the kernel has seen the entry. Certain application don't need a huge SQ ring size, since they just submit IO in batches. But they may have a lot of requests pending, and hence need a big CQ ring to hold them all. By allowing the application to control the CQ ring size multiplier, we can cater to those applications more efficiently. If an application wants to define its own CQ ring size, it must set IORING_SETUP_CQSIZE in the setup flags, and fill out io_uring_params->cq_entries. The value must be a power of two. Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
c3a31e6056
commit
33a107f0a1
@ -76,6 +76,7 @@
|
|||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
#define IORING_MAX_ENTRIES 32768
|
#define IORING_MAX_ENTRIES 32768
|
||||||
|
#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
|
||||||
#define IORING_MAX_FIXED_FILES 1024
|
#define IORING_MAX_FIXED_FILES 1024
|
||||||
|
|
||||||
struct io_uring {
|
struct io_uring {
|
||||||
@ -4049,10 +4050,23 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
|
|||||||
* Use twice as many entries for the CQ ring. It's possible for the
|
* Use twice as many entries for the CQ ring. It's possible for the
|
||||||
* application to drive a higher depth than the size of the SQ ring,
|
* application to drive a higher depth than the size of the SQ ring,
|
||||||
* since the sqes are only used at submission time. This allows for
|
* since the sqes are only used at submission time. This allows for
|
||||||
* some flexibility in overcommitting a bit.
|
* some flexibility in overcommitting a bit. If the application has
|
||||||
|
* set IORING_SETUP_CQSIZE, it will have passed in the desired number
|
||||||
|
* of CQ ring entries manually.
|
||||||
*/
|
*/
|
||||||
p->sq_entries = roundup_pow_of_two(entries);
|
p->sq_entries = roundup_pow_of_two(entries);
|
||||||
p->cq_entries = 2 * p->sq_entries;
|
if (p->flags & IORING_SETUP_CQSIZE) {
|
||||||
|
/*
|
||||||
|
* If IORING_SETUP_CQSIZE is set, we do the same roundup
|
||||||
|
* to a power-of-two, if it isn't already. We do NOT impose
|
||||||
|
* any cq vs sq ring sizing.
|
||||||
|
*/
|
||||||
|
if (p->cq_entries < p->sq_entries || p->cq_entries > IORING_MAX_CQ_ENTRIES)
|
||||||
|
return -EINVAL;
|
||||||
|
p->cq_entries = roundup_pow_of_two(p->cq_entries);
|
||||||
|
} else {
|
||||||
|
p->cq_entries = 2 * p->sq_entries;
|
||||||
|
}
|
||||||
|
|
||||||
user = get_uid(current_user());
|
user = get_uid(current_user());
|
||||||
account_mem = !capable(CAP_IPC_LOCK);
|
account_mem = !capable(CAP_IPC_LOCK);
|
||||||
@ -4137,7 +4151,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
|
if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
|
||||||
IORING_SETUP_SQ_AFF))
|
IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
ret = io_uring_create(entries, &p);
|
ret = io_uring_create(entries, &p);
|
||||||
|
@ -50,6 +50,7 @@ struct io_uring_sqe {
|
|||||||
#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */
|
#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */
|
||||||
#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */
|
#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */
|
||||||
#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
|
#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
|
||||||
|
#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
|
||||||
|
|
||||||
#define IORING_OP_NOP 0
|
#define IORING_OP_NOP 0
|
||||||
#define IORING_OP_READV 1
|
#define IORING_OP_READV 1
|
||||||
|
Loading…
Reference in New Issue
Block a user