Merge pull request #17734 from joadnacer/iouring-update

std.linux: Update io_uring structs and consts for kernel 6.3.8
This commit is contained in:
Andrew Kelley 2023-10-27 04:44:25 -04:00 committed by GitHub
commit 29b05897a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 277 additions and 110 deletions

View File

@ -3740,35 +3740,6 @@ else
fields: siginfo_fields_union,
};
pub const io_uring_params = extern struct {
sq_entries: u32,
cq_entries: u32,
flags: u32,
sq_thread_cpu: u32,
sq_thread_idle: u32,
features: u32,
wq_fd: u32,
resv: [3]u32,
sq_off: io_sqring_offsets,
cq_off: io_cqring_offsets,
};
// io_uring_params.features flags
pub const IORING_FEAT_SINGLE_MMAP = 1 << 0;
pub const IORING_FEAT_NODROP = 1 << 1;
pub const IORING_FEAT_SUBMIT_STABLE = 1 << 2;
pub const IORING_FEAT_RW_CUR_POS = 1 << 3;
pub const IORING_FEAT_CUR_PERSONALITY = 1 << 4;
pub const IORING_FEAT_FAST_POLL = 1 << 5;
pub const IORING_FEAT_POLL_32BITS = 1 << 6;
pub const IORING_FEAT_SQPOLL_NONFIXED = 1 << 7;
pub const IORING_FEAT_EXT_ARG = 1 << 8;
pub const IORING_FEAT_NATIVE_WORKERS = 1 << 9;
pub const IORING_FEAT_RSRC_TAGS = 1 << 10;
pub const IORING_FEAT_CQE_SKIP = 1 << 11;
pub const IORING_FEAT_LINKED_FILE = 1 << 12;
// io_uring_params.flags
/// io_context is polled
@ -3812,53 +3783,15 @@ pub const IORING_SETUP_SQE128 = 1 << 10;
/// CQEs are 32 byte
pub const IORING_SETUP_CQE32 = 1 << 11;
pub const io_sqring_offsets = extern struct {
/// offset of ring head
head: u32,
/// Only one task is allowed to submit requests
pub const IORING_SETUP_SINGLE_ISSUER = 1 << 12;
/// offset of ring tail
tail: u32,
/// ring mask value
ring_mask: u32,
/// entries in ring
ring_entries: u32,
/// ring flags
flags: u32,
/// number of sqes not submitted
dropped: u32,
/// sqe index array
array: u32,
resv1: u32,
user_addr: u64,
};
// io_sqring_offsets.flags
/// needs io_uring_enter wakeup
pub const IORING_SQ_NEED_WAKEUP = 1 << 0;
/// kernel has cqes waiting beyond the cq ring
pub const IORING_SQ_CQ_OVERFLOW = 1 << 1;
/// task should enter the kernel
pub const IORING_SQ_TASKRUN = 1 << 2;
pub const io_cqring_offsets = extern struct {
head: u32,
tail: u32,
ring_mask: u32,
ring_entries: u32,
overflow: u32,
cqes: u32,
flags: u32,
resv: u32,
user_addr: u64,
};
/// Defer running task work to get events.
/// Rather than running bits of task work whenever the task transitions
/// try to do it just before it is needed.
pub const IORING_SETUP_DEFER_TASKRUN = 1 << 13;
/// IO submission data structure (Submission Queue Entry)
pub const io_uring_sqe = extern struct {
opcode: IORING_OP,
flags: u8,
@ -3872,9 +3805,18 @@ pub const io_uring_sqe = extern struct {
buf_index: u16,
personality: u16,
splice_fd_in: i32,
__pad2: [2]u64,
addr3: u64,
resv: u64,
};
/// If sqe->file_index is set to this for opcodes that instantiate a new
/// direct descriptor (like openat/openat2/accept), then io_uring will allocate
/// an available direct descriptor instead of having the application pass one
/// in. The picked direct descriptor will be returned in cqe->res, or -ENFILE
/// if the space is full.
/// Available since Linux 5.19
pub const IORING_FILE_INDEX_ALLOC = maxInt(u32);
pub const IOSQE_BIT = enum(u8) {
FIXED_FILE,
IO_DRAIN,
@ -3964,6 +3906,10 @@ pub const IORING_OP = enum(u8) {
_,
};
// io_uring_sqe.uring_cmd_flags (rw_flags in the Zig struct)
/// use registered buffer; pass thig flag along with setting sqe->buf_index.
pub const IORING_URING_CMD_FIXED = 1 << 0;
// io_uring_sqe.fsync_flags (rw_flags in the Zig struct)
pub const IORING_FSYNC_DATASYNC = 1 << 0;
@ -3990,6 +3936,7 @@ pub const IORING_POLL_ADD_MULTI = 1 << 0;
/// Update existing poll request, matching sqe->addr as the old user_data field.
pub const IORING_POLL_UPDATE_EVENTS = 1 << 1;
pub const IORING_POLL_UPDATE_USER_DATA = 1 << 2;
pub const IORING_POLL_ADD_LEVEL = 1 << 3;
// ASYNC_CANCEL flags.
@ -3999,6 +3946,8 @@ pub const IORING_ASYNC_CANCEL_ALL = 1 << 0;
pub const IORING_ASYNC_CANCEL_FD = 1 << 1;
/// Match any request
pub const IORING_ASYNC_CANCEL_ANY = 1 << 2;
/// 'fd' passed in is a fixed descriptor. Available since Linux 6.0
pub const IORING_ASYNC_CANCEL_FD_FIXED = 1 << 3;
// send/sendmsg and recv/recvmsg flags (sqe->ioprio)
@ -4007,10 +3956,32 @@ pub const IORING_ASYNC_CANCEL_ANY = 1 << 2;
pub const IORING_RECVSEND_POLL_FIRST = 1 << 0;
/// Multishot recv. Sets IORING_CQE_F_MORE if the handler will continue to report CQEs on behalf of the same SQE.
pub const IORING_RECV_MULTISHOT = 1 << 1;
/// Use registered buffers, the index is stored in the buf_index field.
pub const IORING_RECVSEND_FIXED_BUF = 1 << 2;
/// If set, SEND[MSG]_ZC should report the zerocopy usage in cqe.res for the IORING_CQE_F_NOTIF cqe.
pub const IORING_SEND_ZC_REPORT_USAGE = 1 << 3;
/// CQE.RES FOR IORING_CQE_F_NOTIF if IORING_SEND_ZC_REPORT_USAGE was requested
pub const IORING_NOTIF_USAGE_ZC_COPIED = 1 << 31;
/// accept flags stored in sqe->ioprio
/// accept flags stored in sqe->iopri
pub const IORING_ACCEPT_MULTISHOT = 1 << 0;
/// IORING_OP_MSG_RING command types, stored in sqe->addr
pub const IORING_MSG_RING_COMMAND = enum(u8) {
/// pass sqe->len as 'res' and off as user_data
DATA,
/// send a registered fd to another ring
SEND_FD,
};
// io_uring_sqe.msg_ring_flags (rw_flags in the Zig struct)
/// Don't post a CQE to the target ring. Not applicable for IORING_MSG_DATA, obviously.
pub const IORING_MSG_RING_CQE_SKIP = 1 << 0;
/// Pass through the flags from sqe->file_index (splice_fd_in in the zig struct) to cqe->flags */
pub const IORING_MSG_RING_FLAGS_PASS = 1 << 1;
// IO completion data structure (Completion Queue Entry)
pub const io_uring_cqe = extern struct {
/// io_uring_sqe.data submission passed back
@ -4020,6 +3991,8 @@ pub const io_uring_cqe = extern struct {
res: i32,
flags: u32,
// Followed by 16 bytes of padding if initialized with IORING_SETUP_CQE32, doubling cqe size
pub fn err(self: io_uring_cqe) E {
if (self.res > -4096 and self.res < 0) {
return @as(E, @enumFromInt(-self.res));
@ -4040,11 +4013,66 @@ pub const IORING_CQE_F_SOCK_NONEMPTY = 1 << 2;
/// Set for notification CQEs. Can be used to distinct them from sends.
pub const IORING_CQE_F_NOTIF = 1 << 3;
pub const IORING_CQE_BUFFER_SHIFT = 16;
/// Magic offsets for the application to mmap the data it needs
pub const IORING_OFF_SQ_RING = 0;
pub const IORING_OFF_CQ_RING = 0x8000000;
pub const IORING_OFF_SQES = 0x10000000;
/// Filled with the offset for mmap(2)
pub const io_sqring_offsets = extern struct {
/// offset of ring head
head: u32,
/// offset of ring tail
tail: u32,
/// ring mask value
ring_mask: u32,
/// entries in ring
ring_entries: u32,
/// ring flags
flags: u32,
/// number of sqes not submitted
dropped: u32,
/// sqe index array
array: u32,
resv1: u32,
resv2: u64,
};
// io_sqring_offsets.flags
/// needs io_uring_enter wakeup
pub const IORING_SQ_NEED_WAKEUP = 1 << 0;
/// kernel has cqes waiting beyond the cq ring
pub const IORING_SQ_CQ_OVERFLOW = 1 << 1;
/// task should enter the kernel
pub const IORING_SQ_TASKRUN = 1 << 2;
pub const io_cqring_offsets = extern struct {
head: u32,
tail: u32,
ring_mask: u32,
ring_entries: u32,
overflow: u32,
cqes: u32,
flags: u32,
resv: u32,
user_addr: u64,
};
// io_cqring_offsets.flags
/// disable eventfd notifications
pub const IORING_CQ_EVENTFD_DISABLED = 1 << 0;
// io_uring_enter flags
pub const IORING_ENTER_GETEVENTS = 1 << 0;
pub const IORING_ENTER_SQ_WAKEUP = 1 << 1;
@ -4052,8 +4080,37 @@ pub const IORING_ENTER_SQ_WAIT = 1 << 2;
pub const IORING_ENTER_EXT_ARG = 1 << 3;
pub const IORING_ENTER_REGISTERED_RING = 1 << 4;
pub const io_uring_params = extern struct {
sq_entries: u32,
cq_entries: u32,
flags: u32,
sq_thread_cpu: u32,
sq_thread_idle: u32,
features: u32,
wq_fd: u32,
resv: [3]u32,
sq_off: io_sqring_offsets,
cq_off: io_cqring_offsets,
};
// io_uring_params.features flags
pub const IORING_FEAT_SINGLE_MMAP = 1 << 0;
pub const IORING_FEAT_NODROP = 1 << 1;
pub const IORING_FEAT_SUBMIT_STABLE = 1 << 2;
pub const IORING_FEAT_RW_CUR_POS = 1 << 3;
pub const IORING_FEAT_CUR_PERSONALITY = 1 << 4;
pub const IORING_FEAT_FAST_POLL = 1 << 5;
pub const IORING_FEAT_POLL_32BITS = 1 << 6;
pub const IORING_FEAT_SQPOLL_NONFIXED = 1 << 7;
pub const IORING_FEAT_EXT_ARG = 1 << 8;
pub const IORING_FEAT_NATIVE_WORKERS = 1 << 9;
pub const IORING_FEAT_RSRC_TAGS = 1 << 10;
pub const IORING_FEAT_CQE_SKIP = 1 << 11;
pub const IORING_FEAT_LINKED_FILE = 1 << 12;
// io_uring_register opcodes and arguments
pub const IORING_REGISTER = enum(u8) {
pub const IORING_REGISTER = enum(u32) {
REGISTER_BUFFERS,
UNREGISTER_BUFFERS,
REGISTER_FILES,
@ -4069,41 +4126,93 @@ pub const IORING_REGISTER = enum(u8) {
REGISTER_ENABLE_RINGS,
// extended with tagging
IORING_REGISTER_FILES2,
IORING_REGISTER_FILES_UPDATE2,
IORING_REGISTER_BUFFERS2,
IORING_REGISTER_BUFFERS_UPDATE,
REGISTER_FILES2,
REGISTER_FILES_UPDATE2,
REGISTER_BUFFERS2,
REGISTER_BUFFERS_UPDATE,
// set/clear io-wq thread affinities
IORING_REGISTER_IOWQ_AFF,
IORING_UNREGISTER_IOWQ_AFF,
REGISTER_IOWQ_AFF,
UNREGISTER_IOWQ_AFF,
// set/get max number of io-wq workers
IORING_REGISTER_IOWQ_MAX_WORKERS,
REGISTER_IOWQ_MAX_WORKERS,
// register/unregister io_uring fd with the ring
IORING_REGISTER_RING_FDS,
IORING_UNREGISTER_RING_FDS,
REGISTER_RING_FDS,
NREGISTER_RING_FDS,
// register ring based provide buffer group
IORING_REGISTER_PBUF_RING,
IORING_UNREGISTER_PBUF_RING,
REGISTER_PBUF_RING,
UNREGISTER_PBUF_RING,
// sync cancelation API
IORING_REGISTER_SYNC_CANCEL,
REGISTER_SYNC_CANCEL,
// register a range of fixed file slots for automatic slot allocation
IORING_REGISTER_FILE_ALLOC_RANGE,
REGISTER_FILE_ALLOC_RANGE,
// flag added to the opcode to use a registered ring fd
IORING_REGISTER_USE_REGISTERED_RING = 1 << 31,
_,
};
/// io_uring_restriction->opcode values
pub const IOWQ_CATEGORIES = enum(u8) {
BOUND,
UNBOUND,
};
/// deprecated, see struct io_uring_rsrc_update
pub const io_uring_files_update = extern struct {
offset: u32,
resv: u32,
fds: u64,
};
/// Register a fully sparse file space, rather than pass in an array of all -1 file descriptors.
pub const IORING_RSRC_REGISTER_SPARSE = 1 << 0;
pub const io_uring_rsrc_register = extern struct {
nr: u32,
flags: u32,
resv2: u64,
data: u64,
tags: u64,
};
pub const io_uring_rsrc_update = extern struct {
offset: u32,
resv: u32,
data: u64,
};
pub const io_uring_rsrc_update2 = extern struct {
offset: u32,
resv: u32,
data: u64,
tags: u64,
nr: u32,
resv2: u32,
};
pub const io_uring_notification_slot = extern struct {
tag: u64,
resv: [3]u64,
};
pub const io_uring_notification_register = extern struct {
nr_slots: u32,
resv: u32,
resv2: u64,
data: u64,
resv3: u64,
};
/// Skip updating fd indexes set to this value in the fd table */
pub const IORING_REGISTER_FILES_SKIP = -2;
pub const IO_URING_OP_SUPPORTED = 1 << 0;
pub const io_uring_probe_op = extern struct {
@ -4131,7 +4240,7 @@ pub const io_uring_probe = extern struct {
};
pub const io_uring_restriction = extern struct {
opcode: u16,
opcode: IORING_RESTRICTION,
arg: extern union {
/// IORING_RESTRICTION_REGISTER_OP
register_op: IORING_REGISTER,
@ -4147,7 +4256,7 @@ pub const io_uring_restriction = extern struct {
};
/// io_uring_restriction->opcode values
pub const IORING_RESTRICTION = enum(u8) {
pub const IORING_RESTRICTION = enum(u16) {
/// Allow an io_uring_register(2) opcode
REGISTER_OP = 0,
@ -4163,6 +4272,56 @@ pub const IORING_RESTRICTION = enum(u8) {
_,
};
pub const io_uring_buf = extern struct {
addr: u64,
len: u32,
bid: u16,
resv: u16,
};
// io_uring_buf_ring struct omitted
// it's a io_uring_buf array with the resv of the first item used as a "tail" field.
/// argument for IORING_(UN)REGISTER_PBUF_RING
pub const io_uring_buf_reg = extern struct {
ring_addr: u64,
ring_entries: u32,
bgid: u16,
pad: u16,
resv: [3]u64,
};
pub const io_uring_getevents_arg = extern struct {
sigmask: u64,
sigmask_sz: u32,
pad: u32,
ts: u64,
};
/// Argument for IORING_REGISTER_SYNC_CANCEL
pub const io_uring_sync_cancel_reg = extern struct {
addr: u64,
fd: i32,
flags: u32,
timeout: kernel_timespec,
pad: [4]u64,
};
/// Argument for IORING_REGISTER_FILE_ALLOC_RANGE
/// The range is specified as [off, off + len)
pub const io_uring_file_index_range = extern struct {
off: u32,
len: u32,
resv: u64,
};
pub const io_uring_recvmsg_out = extern struct {
namelen: u32,
controllen: u32,
payloadlen: u32,
flags: u32,
};
pub const utsname = extern struct {
sysname: [64:0]u8,
nodename: [64:0]u8,

View File

@ -415,10 +415,10 @@ pub const IO_Uring = struct {
/// Queues (but does not submit) an SQE to perform a `splice(2)`
/// Either `fd_in` or `fd_out` must be a pipe.
/// If `fd_in` refers to a pipe, `off_in` is ignored and must be set to -1.
/// If `fd_in` does not refer to a pipe and `off_in` is -1, then `len` are read
/// If `fd_in` refers to a pipe, `off_in` is ignored and must be set to std.math.maxInt(u64).
/// If `fd_in` does not refer to a pipe and `off_in` is maxInt(u64), then `len` are read
/// from `fd_in` starting from the file offset, which is incremented by the number of bytes read.
/// If `fd_in` does not refer to a pipe and `off_in` is not -1, then the starting offset of `fd_in` will be `off_in`.
/// If `fd_in` does not refer to a pipe and `off_in` is not maxInt(u64), then the starting offset of `fd_in` will be `off_in`.
/// This splice operation can be used to implement sendfile by splicing to an intermediate pipe first,
/// then splice to the final destination. In fact, the implementation of sendfile in kernel uses splice internally.
///
@ -427,7 +427,7 @@ pub const IO_Uring = struct {
/// See https://github.com/axboe/liburing/issues/291
///
/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
pub fn splice(self: *IO_Uring, user_data: u64, fd_in: os.fd_t, off_in: i64, fd_out: os.fd_t, off_out: i64, len: usize) !*linux.io_uring_sqe {
pub fn splice(self: *IO_Uring, user_data: u64, fd_in: os.fd_t, off_in: u64, fd_out: os.fd_t, off_out: u64, len: usize) !*linux.io_uring_sqe {
const sqe = try self.get_sqe();
io_uring_prep_splice(sqe, fd_in, off_in, fd_out, off_out, len);
sqe.user_data = user_data;
@ -1210,7 +1210,8 @@ pub fn io_uring_prep_nop(sqe: *linux.io_uring_sqe) void {
.buf_index = 0,
.personality = 0,
.splice_fd_in = 0,
.__pad2 = [2]u64{ 0, 0 },
.addr3 = 0,
.resv = 0,
};
}
@ -1228,7 +1229,8 @@ pub fn io_uring_prep_fsync(sqe: *linux.io_uring_sqe, fd: os.fd_t, flags: u32) vo
.buf_index = 0,
.personality = 0,
.splice_fd_in = 0,
.__pad2 = [2]u64{ 0, 0 },
.addr3 = 0,
.resv = 0,
};
}
@ -1253,7 +1255,8 @@ pub fn io_uring_prep_rw(
.buf_index = 0,
.personality = 0,
.splice_fd_in = 0,
.__pad2 = [2]u64{ 0, 0 },
.addr3 = 0,
.resv = 0,
};
}
@ -1265,9 +1268,9 @@ pub fn io_uring_prep_write(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []cons
io_uring_prep_rw(.WRITE, sqe, fd, @intFromPtr(buffer.ptr), buffer.len, offset);
}
pub fn io_uring_prep_splice(sqe: *linux.io_uring_sqe, fd_in: os.fd_t, off_in: i64, fd_out: os.fd_t, off_out: i64, len: usize) void {
io_uring_prep_rw(.SPLICE, sqe, fd_out, undefined, len, @bitCast(off_out));
sqe.addr = @bitCast(off_in);
pub fn io_uring_prep_splice(sqe: *linux.io_uring_sqe, fd_in: os.fd_t, off_in: u64, fd_out: os.fd_t, off_out: u64, len: usize) void {
io_uring_prep_rw(.SPLICE, sqe, fd_out, undefined, len, off_out);
sqe.addr = off_in;
sqe.splice_fd_in = fd_in;
}
@ -1397,7 +1400,8 @@ pub fn io_uring_prep_close(sqe: *linux.io_uring_sqe, fd: os.fd_t) void {
.buf_index = 0,
.personality = 0,
.splice_fd_in = 0,
.__pad2 = [2]u64{ 0, 0 },
.addr3 = 0,
.resv = 0,
};
}
@ -1425,7 +1429,8 @@ pub fn io_uring_prep_timeout_remove(sqe: *linux.io_uring_sqe, timeout_user_data:
.buf_index = 0,
.personality = 0,
.splice_fd_in = 0,
.__pad2 = [2]u64{ 0, 0 },
.addr3 = 0,
.resv = 0,
};
}
@ -1485,7 +1490,8 @@ pub fn io_uring_prep_fallocate(
.buf_index = 0,
.personality = 0,
.splice_fd_in = 0,
.__pad2 = [2]u64{ 0, 0 },
.addr3 = 0,
.resv = 0,
};
}
@ -1657,7 +1663,8 @@ test "nop" {
.buf_index = 0,
.personality = 0,
.splice_fd_in = 0,
.__pad2 = [2]u64{ 0, 0 },
.addr3 = 0,
.resv = 0,
}, sqe.*);
try testing.expectEqual(@as(u32, 0), ring.sq.sqe_head);
@ -1881,17 +1888,17 @@ test "splice/read" {
_ = try file_src.write(&buffer_write);
var fds = try os.pipe();
const pipe_offset: i64 = -1;
const pipe_offset: u64 = std.math.maxInt(u64);
const sqe_splice_to_pipe = try ring.splice(0x11111111, fd_src, 0, fds[1], pipe_offset, buffer_write.len);
try testing.expectEqual(linux.IORING_OP.SPLICE, sqe_splice_to_pipe.opcode);
try testing.expectEqual(@as(u64, 0), sqe_splice_to_pipe.addr);
try testing.expectEqual(@as(u64, @bitCast((pipe_offset))), sqe_splice_to_pipe.off);
try testing.expectEqual(pipe_offset, sqe_splice_to_pipe.off);
sqe_splice_to_pipe.flags |= linux.IOSQE_IO_LINK;
const sqe_splice_from_pipe = try ring.splice(0x22222222, fds[0], pipe_offset, fd_dst, 10, buffer_write.len);
try testing.expectEqual(linux.IORING_OP.SPLICE, sqe_splice_from_pipe.opcode);
try testing.expectEqual(@as(u64, @bitCast(pipe_offset)), sqe_splice_from_pipe.addr);
try testing.expectEqual(pipe_offset, sqe_splice_from_pipe.addr);
try testing.expectEqual(@as(u64, 10), sqe_splice_from_pipe.off);
sqe_splice_from_pipe.flags |= linux.IOSQE_IO_LINK;
@ -2028,7 +2035,8 @@ test "openat" {
.buf_index = 0,
.personality = 0,
.splice_fd_in = 0,
.__pad2 = [2]u64{ 0, 0 },
.addr3 = 0,
.resv = 0,
}, sqe_openat.*);
try testing.expectEqual(@as(u32, 1), try ring.submit());