linux.io_uring: Implement splice functions

This commit is contained in:
joadnacer 2023-10-20 01:35:12 +01:00 committed by Veikka Tuominen
parent 0d4d8dfc15
commit 89aad1b89c

View File

@ -413,6 +413,27 @@ pub const IO_Uring = struct {
return sqe;
}
/// Queues (but does not submit) an SQE to perform a `splice(2)`
/// Either `fd_in` or `fd_out` must be a pipe.
/// If `fd_in` refers to a pipe, `off_in` is ignored and must be set to -1.
/// If `fd_in` does not refer to a pipe and `off_in` is -1, then `len` are read
/// from `fd_in` starting from the file offset, which is incremented by the number of bytes read.
/// If `fd_in` does not refer to a pipe and `off_in` is not -1, then the starting offset of `fd_in` will be `off_in`.
/// This splice operation can be used to implement sendfile by splicing to an intermediate pipe first,
/// then splice to the final destination. In fact, the implementation of sendfile in kernel uses splice internally.
///
/// NOTE that even if fd_in or fd_out refers to a pipe, the splice operation can still fail with EINVAL if one of the
/// fd doesn't explicitly support splice peration, e.g. reading from terminal is unsupported from kernel 5.7 to 5.11.
/// See https://github.com/axboe/liburing/issues/291
///
/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
pub fn splice(self: *IO_Uring, user_data: u64, fd_in: os.fd_t, off_in: i64, fd_out: os.fd_t, off_out: i64, len: usize) !*linux.io_uring_sqe {
const sqe = try self.get_sqe();
io_uring_prep_splice(sqe, fd_in, off_in, fd_out, off_out, len);
sqe.user_data = user_data;
return sqe;
}
/// Queues (but does not submit) an SQE to perform a IORING_OP_READ_FIXED.
/// The `buffer` provided must be registered with the kernel by calling `register_buffers` first.
/// The `buffer_index` must be the same as its index in the array provided to `register_buffers`.
@ -1244,6 +1265,12 @@ pub fn io_uring_prep_write(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []cons
io_uring_prep_rw(.WRITE, sqe, fd, @intFromPtr(buffer.ptr), buffer.len, offset);
}
pub fn io_uring_prep_splice(sqe: *linux.io_uring_sqe, fd_in: os.fd_t, off_in: i64, fd_out: os.fd_t, off_out: i64, len: usize) void {
io_uring_prep_rw(.SPLICE, sqe, fd_out, undefined, len, @bitCast(off_out));
sqe.addr = @bitCast(off_in);
sqe.splice_fd_in = fd_in;
}
pub fn io_uring_prep_readv(
sqe: *linux.io_uring_sqe,
fd: os.fd_t,
@ -1828,6 +1855,77 @@ test "write/read" {
try testing.expectEqualSlices(u8, buffer_write[0..], buffer_read[0..]);
}
test "splice/read" {
if (builtin.os.tag != .linux) return error.SkipZigTest;
var ring = IO_Uring.init(4, 0) catch |err| switch (err) {
error.SystemOutdated => return error.SkipZigTest,
error.PermissionDenied => return error.SkipZigTest,
else => return err,
};
defer ring.deinit();
var tmp = std.testing.tmpDir(.{});
const path_src = "test_io_uring_splice_src";
const file_src = try tmp.dir.createFile(path_src, .{ .read = true, .truncate = true });
defer file_src.close();
const fd_src = file_src.handle;
const path_dst = "test_io_uring_splice_dst";
const file_dst = try tmp.dir.createFile(path_dst, .{ .read = true, .truncate = true });
defer file_dst.close();
const fd_dst = file_dst.handle;
const buffer_write = [_]u8{97} ** 20;
var buffer_read = [_]u8{98} ** 20;
_ = try file_src.write(&buffer_write);
var fds = try os.pipe();
const pipe_offset: i64 = -1;
const sqe_splice_to_pipe = try ring.splice(0x11111111, fd_src, 0, fds[1], pipe_offset, buffer_write.len);
try testing.expectEqual(linux.IORING_OP.SPLICE, sqe_splice_to_pipe.opcode);
try testing.expectEqual(@as(u64, 0), sqe_splice_to_pipe.addr);
try testing.expectEqual(@as(u64, @bitCast((pipe_offset))), sqe_splice_to_pipe.off);
sqe_splice_to_pipe.flags |= linux.IOSQE_IO_LINK;
const sqe_splice_from_pipe = try ring.splice(0x22222222, fds[0], pipe_offset, fd_dst, 10, buffer_write.len);
try testing.expectEqual(linux.IORING_OP.SPLICE, sqe_splice_from_pipe.opcode);
try testing.expectEqual(@as(u64, @bitCast(pipe_offset)), sqe_splice_from_pipe.addr);
try testing.expectEqual(@as(u64, 10), sqe_splice_from_pipe.off);
sqe_splice_from_pipe.flags |= linux.IOSQE_IO_LINK;
const sqe_read = try ring.read(0x33333333, fd_dst, .{ .buffer = buffer_read[0..] }, 10);
try testing.expectEqual(linux.IORING_OP.READ, sqe_read.opcode);
try testing.expectEqual(@as(u64, 10), sqe_read.off);
try testing.expectEqual(@as(u32, 3), try ring.submit());
const cqe_splice_to_pipe = try ring.copy_cqe();
const cqe_splice_from_pipe = try ring.copy_cqe();
const cqe_read = try ring.copy_cqe();
// Prior to Linux Kernel 5.6 this is the only way to test for splice/read support:
// https://lwn.net/Articles/809820/
if (cqe_splice_to_pipe.err() == .INVAL) return error.SkipZigTest;
if (cqe_splice_from_pipe.err() == .INVAL) return error.SkipZigTest;
if (cqe_read.err() == .INVAL) return error.SkipZigTest;
try testing.expectEqual(linux.io_uring_cqe{
.user_data = 0x11111111,
.res = buffer_write.len,
.flags = 0,
}, cqe_splice_to_pipe);
try testing.expectEqual(linux.io_uring_cqe{
.user_data = 0x22222222,
.res = buffer_write.len,
.flags = 0,
}, cqe_splice_from_pipe);
try testing.expectEqual(linux.io_uring_cqe{
.user_data = 0x33333333,
.res = buffer_read.len,
.flags = 0,
}, cqe_read);
try testing.expectEqualSlices(u8, buffer_write[0..], buffer_read[0..]);
}
test "write_fixed/read_fixed" {
if (builtin.os.tag != .linux) return error.SkipZigTest;