io_uring: switch to atomic_t for io_kiocb reference count

io_uring manipulates references twice for each request, and hence is very
sensitive to performance of the reference count. This commit borrows a
trick from:

commit f958d7b528
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu Apr 11 10:06:20 2019 -0700

    mm: make page ref count overflow check tighter and more explicit

and switches to atomic_t for references, while still retaining overflow
and underflow checks.

This is good for a 2-3% increase in peak IOPS on a single core. Before:

IOPS=2970879, IOS/call=31/31, inflight=128 (128)
IOPS=2952597, IOS/call=31/31, inflight=128 (128)
IOPS=2943904, IOS/call=31/31, inflight=128 (128)
IOPS=2930006, IOS/call=31/31, inflight=96 (96)

and after:

IOPS=3054354, IOS/call=31/31, inflight=128 (128)
IOPS=3059038, IOS/call=31/31, inflight=128 (128)
IOPS=3060320, IOS/call=31/31, inflight=128 (128)
IOPS=3068256, IOS/call=31/31, inflight=96 (96)

Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2021-02-24 13:32:30 -07:00
parent de9b4ccad7
commit abc54d6343

View File

@ -801,7 +801,7 @@ struct io_kiocb {
struct io_ring_ctx *ctx;
unsigned int flags;
refcount_t refs;
atomic_t refs;
struct task_struct *task;
u64 user_data;
@ -1470,29 +1470,39 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
return ret;
}
/*
* Shamelessly stolen from the mm implementation of page reference checking,
* see commit f958d7b528b1 for details.
*/
#define req_ref_zero_or_close_to_overflow(req) \
((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u)
static inline bool req_ref_inc_not_zero(struct io_kiocb *req)
{
return refcount_inc_not_zero(&req->refs);
return atomic_inc_not_zero(&req->refs);
}
static inline bool req_ref_sub_and_test(struct io_kiocb *req, int refs)
{
return refcount_sub_and_test(refs, &req->refs);
WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
return atomic_sub_and_test(refs, &req->refs);
}
static inline bool req_ref_put_and_test(struct io_kiocb *req)
{
return refcount_dec_and_test(&req->refs);
WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
return atomic_dec_and_test(&req->refs);
}
static inline void req_ref_put(struct io_kiocb *req)
{
refcount_dec(&req->refs);
WARN_ON_ONCE(req_ref_put_and_test(req));
}
static inline void req_ref_get(struct io_kiocb *req)
{
refcount_inc(&req->refs);
WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
atomic_inc(&req->refs);
}
static void __io_cqring_fill_event(struct io_kiocb *req, long res,
@ -6383,7 +6393,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
req->link = NULL;
req->fixed_rsrc_refs = NULL;
/* one is dropped after submission, the other at completion */
refcount_set(&req->refs, 2);
atomic_set(&req->refs, 2);
req->task = current;
req->result = 0;
req->work.list.next = NULL;