mirror of
https://github.com/torvalds/linux.git
synced 2024-11-11 06:31:49 +00:00
for-5.19/io_uring-socket-2022-05-22
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmKKorgQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpm0eEACdTzhm7h5cXn9KjIvWLkdocAb/NOL8GYPn Q1mY1SqKQFZvs/fyKHkkZEiIBPxhvN6snVFXMpb4LDmPYeeH4GTUlNomrGTIjvf/ j6SnZN4lCs9A2NlE+iDVWnFQOPQFALza2Y9BhC5xzay326qnKlO+0fQv3C1vXXrc /PNLqxQr7+GmO0a0PJnS6mGWGj6qF7nLqilB9apnKsTK6BKbJEec6ciKreqxU6ME WHaux11uIAbcf8rc6C/2myEK0k6jCOAue3vZ0lizygf+8klUCl2vMqV5BLwCBlXG /e7hBsUUrGr0CG0fryqhQQTUxsZLshioBbQH1vttSeZCli46mmWWAhPNy3/jb1ZU 72bazA84Fe9ney9uVZvZoMoBsG+6t6UOatqND13MeRFAXnkRr0jZRuau2iBxgqAr OINJW+IVPU7IrCD+S4lV1/LCdhLhYcob8/zfKmIrdHMQnWG/gLonVpYJIBCyLDAv 2jvHFIPJuSMUSGVjRKCb16LLNV6u7YG6VOWbKuippxfJxDdwA3TOtOhvTJIpYq0u TotPgpZ7bfcr4xDsGgD9mZS8E7jwsL/G0/MwsnixELykEXuhd++sgoTbr+RyUYdV 45Hm6DsxlytjzOb/5uQrqhwrso05eVt14K74XApPa3fWKL8aWCh1jGSdo3CSbIyW iHwss919Ag== =nb5i -----END PGP SIGNATURE----- Merge tag 'for-5.19/io_uring-socket-2022-05-22' of git://git.kernel.dk/linux-block Pull io_uring socket() support from Jens Axboe: "This adds support for socket(2) for io_uring. This is handy when using direct / registered file descriptors with io_uring. Outside of those two patches, a small series from Dylan on top that improves the tracing by providing a text representation of the opcode rather than needing to decode this by reading the header file every time. That sits in this branch as it was the last opcode added (until it wasn't...)" * tag 'for-5.19/io_uring-socket-2022-05-22' of git://git.kernel.dk/linux-block: io_uring: use the text representation of ops in trace io_uring: rename op -> opcode io_uring: add io_uring_get_opcode io_uring: add type to op enum io_uring: add socket(2) support net: add __sys_socket_file()
This commit is contained in:
commit
368da430d0
177
fs/io_uring.c
177
fs/io_uring.c
@ -600,6 +600,16 @@ struct io_accept {
|
||||
unsigned long nofile;
|
||||
};
|
||||
|
||||
struct io_socket {
|
||||
struct file *file;
|
||||
int domain;
|
||||
int type;
|
||||
int protocol;
|
||||
int flags;
|
||||
u32 file_slot;
|
||||
unsigned long nofile;
|
||||
};
|
||||
|
||||
struct io_sync {
|
||||
struct file *file;
|
||||
loff_t len;
|
||||
@ -981,6 +991,7 @@ struct io_kiocb {
|
||||
struct io_hardlink hardlink;
|
||||
struct io_msg msg;
|
||||
struct io_xattr xattr;
|
||||
struct io_socket sock;
|
||||
};
|
||||
|
||||
u8 opcode;
|
||||
@ -1297,6 +1308,9 @@ static const struct io_op_def io_op_defs[] = {
|
||||
.needs_file = 1
|
||||
},
|
||||
[IORING_OP_GETXATTR] = {},
|
||||
[IORING_OP_SOCKET] = {
|
||||
.audit_skip = 1,
|
||||
},
|
||||
};
|
||||
|
||||
/* requests with any of those set should undergo io_disarm_next() */
|
||||
@ -1341,6 +1355,107 @@ static struct kmem_cache *req_cachep;
|
||||
|
||||
static const struct file_operations io_uring_fops;
|
||||
|
||||
const char *io_uring_get_opcode(u8 opcode)
|
||||
{
|
||||
switch ((enum io_uring_op)opcode) {
|
||||
case IORING_OP_NOP:
|
||||
return "NOP";
|
||||
case IORING_OP_READV:
|
||||
return "READV";
|
||||
case IORING_OP_WRITEV:
|
||||
return "WRITEV";
|
||||
case IORING_OP_FSYNC:
|
||||
return "FSYNC";
|
||||
case IORING_OP_READ_FIXED:
|
||||
return "READ_FIXED";
|
||||
case IORING_OP_WRITE_FIXED:
|
||||
return "WRITE_FIXED";
|
||||
case IORING_OP_POLL_ADD:
|
||||
return "POLL_ADD";
|
||||
case IORING_OP_POLL_REMOVE:
|
||||
return "POLL_REMOVE";
|
||||
case IORING_OP_SYNC_FILE_RANGE:
|
||||
return "SYNC_FILE_RANGE";
|
||||
case IORING_OP_SENDMSG:
|
||||
return "SENDMSG";
|
||||
case IORING_OP_RECVMSG:
|
||||
return "RECVMSG";
|
||||
case IORING_OP_TIMEOUT:
|
||||
return "TIMEOUT";
|
||||
case IORING_OP_TIMEOUT_REMOVE:
|
||||
return "TIMEOUT_REMOVE";
|
||||
case IORING_OP_ACCEPT:
|
||||
return "ACCEPT";
|
||||
case IORING_OP_ASYNC_CANCEL:
|
||||
return "ASYNC_CANCEL";
|
||||
case IORING_OP_LINK_TIMEOUT:
|
||||
return "LINK_TIMEOUT";
|
||||
case IORING_OP_CONNECT:
|
||||
return "CONNECT";
|
||||
case IORING_OP_FALLOCATE:
|
||||
return "FALLOCATE";
|
||||
case IORING_OP_OPENAT:
|
||||
return "OPENAT";
|
||||
case IORING_OP_CLOSE:
|
||||
return "CLOSE";
|
||||
case IORING_OP_FILES_UPDATE:
|
||||
return "FILES_UPDATE";
|
||||
case IORING_OP_STATX:
|
||||
return "STATX";
|
||||
case IORING_OP_READ:
|
||||
return "READ";
|
||||
case IORING_OP_WRITE:
|
||||
return "WRITE";
|
||||
case IORING_OP_FADVISE:
|
||||
return "FADVISE";
|
||||
case IORING_OP_MADVISE:
|
||||
return "MADVISE";
|
||||
case IORING_OP_SEND:
|
||||
return "SEND";
|
||||
case IORING_OP_RECV:
|
||||
return "RECV";
|
||||
case IORING_OP_OPENAT2:
|
||||
return "OPENAT2";
|
||||
case IORING_OP_EPOLL_CTL:
|
||||
return "EPOLL_CTL";
|
||||
case IORING_OP_SPLICE:
|
||||
return "SPLICE";
|
||||
case IORING_OP_PROVIDE_BUFFERS:
|
||||
return "PROVIDE_BUFFERS";
|
||||
case IORING_OP_REMOVE_BUFFERS:
|
||||
return "REMOVE_BUFFERS";
|
||||
case IORING_OP_TEE:
|
||||
return "TEE";
|
||||
case IORING_OP_SHUTDOWN:
|
||||
return "SHUTDOWN";
|
||||
case IORING_OP_RENAMEAT:
|
||||
return "RENAMEAT";
|
||||
case IORING_OP_UNLINKAT:
|
||||
return "UNLINKAT";
|
||||
case IORING_OP_MKDIRAT:
|
||||
return "MKDIRAT";
|
||||
case IORING_OP_SYMLINKAT:
|
||||
return "SYMLINKAT";
|
||||
case IORING_OP_LINKAT:
|
||||
return "LINKAT";
|
||||
case IORING_OP_MSG_RING:
|
||||
return "MSG_RING";
|
||||
case IORING_OP_FSETXATTR:
|
||||
return "FSETXATTR";
|
||||
case IORING_OP_SETXATTR:
|
||||
return "SETXATTR";
|
||||
case IORING_OP_FGETXATTR:
|
||||
return "FGETXATTR";
|
||||
case IORING_OP_GETXATTR:
|
||||
return "GETXATTR";
|
||||
case IORING_OP_SOCKET:
|
||||
return "SOCKET";
|
||||
case IORING_OP_LAST:
|
||||
return "INVALID";
|
||||
}
|
||||
return "INVALID";
|
||||
}
|
||||
|
||||
struct sock *io_uring_get_socket(struct file *file)
|
||||
{
|
||||
#if defined(CONFIG_UNIX)
|
||||
@ -6237,6 +6352,62 @@ retry:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_socket *sock = &req->sock;
|
||||
|
||||
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
|
||||
return -EINVAL;
|
||||
if (sqe->ioprio || sqe->addr || sqe->rw_flags || sqe->buf_index)
|
||||
return -EINVAL;
|
||||
|
||||
sock->domain = READ_ONCE(sqe->fd);
|
||||
sock->type = READ_ONCE(sqe->off);
|
||||
sock->protocol = READ_ONCE(sqe->len);
|
||||
sock->file_slot = READ_ONCE(sqe->file_index);
|
||||
sock->nofile = rlimit(RLIMIT_NOFILE);
|
||||
|
||||
sock->flags = sock->type & ~SOCK_TYPE_MASK;
|
||||
if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
|
||||
return -EINVAL;
|
||||
if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_socket(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_socket *sock = &req->sock;
|
||||
bool fixed = !!sock->file_slot;
|
||||
struct file *file;
|
||||
int ret, fd;
|
||||
|
||||
if (!fixed) {
|
||||
fd = __get_unused_fd_flags(sock->flags, sock->nofile);
|
||||
if (unlikely(fd < 0))
|
||||
return fd;
|
||||
}
|
||||
file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
|
||||
if (IS_ERR(file)) {
|
||||
if (!fixed)
|
||||
put_unused_fd(fd);
|
||||
ret = PTR_ERR(file);
|
||||
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
|
||||
return -EAGAIN;
|
||||
if (ret == -ERESTARTSYS)
|
||||
ret = -EINTR;
|
||||
req_set_fail(req);
|
||||
} else if (!fixed) {
|
||||
fd_install(fd, file);
|
||||
ret = fd;
|
||||
} else {
|
||||
ret = io_install_fixed_file(req, file, issue_flags,
|
||||
sock->file_slot - 1);
|
||||
}
|
||||
__io_req_complete(req, issue_flags, ret, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_connect_prep_async(struct io_kiocb *req)
|
||||
{
|
||||
struct io_async_connect *io = req->async_data;
|
||||
@ -6322,6 +6493,7 @@ IO_NETOP_PREP_ASYNC(sendmsg);
|
||||
IO_NETOP_PREP_ASYNC(recvmsg);
|
||||
IO_NETOP_PREP_ASYNC(connect);
|
||||
IO_NETOP_PREP(accept);
|
||||
IO_NETOP_PREP(socket);
|
||||
IO_NETOP_FN(send);
|
||||
IO_NETOP_FN(recv);
|
||||
#endif /* CONFIG_NET */
|
||||
@ -7651,6 +7823,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
return io_fgetxattr_prep(req, sqe);
|
||||
case IORING_OP_GETXATTR:
|
||||
return io_getxattr_prep(req, sqe);
|
||||
case IORING_OP_SOCKET:
|
||||
return io_socket_prep(req, sqe);
|
||||
}
|
||||
|
||||
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
|
||||
@ -7974,6 +8148,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
|
||||
case IORING_OP_GETXATTR:
|
||||
ret = io_getxattr(req, issue_flags);
|
||||
break;
|
||||
case IORING_OP_SOCKET:
|
||||
ret = io_socket(req, issue_flags);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
|
@ -10,6 +10,7 @@ struct sock *io_uring_get_socket(struct file *file);
|
||||
void __io_uring_cancel(bool cancel_all);
|
||||
void __io_uring_free(struct task_struct *tsk);
|
||||
void io_uring_unreg_ringfd(void);
|
||||
const char *io_uring_get_opcode(u8 opcode);
|
||||
|
||||
static inline void io_uring_files_cancel(void)
|
||||
{
|
||||
@ -42,6 +43,10 @@ static inline void io_uring_files_cancel(void)
|
||||
static inline void io_uring_free(struct task_struct *tsk)
|
||||
{
|
||||
}
|
||||
static inline const char *io_uring_get_opcode(u8 opcode)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -434,6 +434,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags,
|
||||
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
|
||||
int __user *upeer_addrlen, int flags);
|
||||
extern int __sys_socket(int family, int type, int protocol);
|
||||
extern struct file *__sys_socket_file(int family, int type, int protocol);
|
||||
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
|
||||
extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
|
||||
int addrlen, int file_flags);
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
#include <uapi/linux/io_uring.h>
|
||||
#include <linux/io_uring.h>
|
||||
|
||||
struct io_wq_work;
|
||||
|
||||
@ -169,8 +170,9 @@ TRACE_EVENT(io_uring_queue_async_work,
|
||||
__entry->rw = rw;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p",
|
||||
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
|
||||
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
|
||||
__entry->ctx, __entry->req, __entry->user_data,
|
||||
io_uring_get_opcode(__entry->opcode),
|
||||
__entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
|
||||
);
|
||||
|
||||
@ -205,8 +207,9 @@ TRACE_EVENT(io_uring_defer,
|
||||
__entry->opcode = opcode;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d",
|
||||
__entry->ctx, __entry->req, __entry->data, __entry->opcode)
|
||||
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s",
|
||||
__entry->ctx, __entry->req, __entry->data,
|
||||
io_uring_get_opcode(__entry->opcode))
|
||||
);
|
||||
|
||||
/**
|
||||
@ -305,9 +308,9 @@ TRACE_EVENT(io_uring_fail_link,
|
||||
__entry->link = link;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p",
|
||||
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
|
||||
__entry->link)
|
||||
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p",
|
||||
__entry->ctx, __entry->req, __entry->user_data,
|
||||
io_uring_get_opcode(__entry->opcode), __entry->link)
|
||||
);
|
||||
|
||||
/**
|
||||
@ -389,9 +392,9 @@ TRACE_EVENT(io_uring_submit_sqe,
|
||||
__entry->sq_thread = sq_thread;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, "
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
|
||||
"non block %d, sq_thread %d", __entry->ctx, __entry->req,
|
||||
__entry->user_data, __entry->opcode,
|
||||
__entry->user_data, io_uring_get_opcode(__entry->opcode),
|
||||
__entry->flags, __entry->force_nonblock, __entry->sq_thread)
|
||||
);
|
||||
|
||||
@ -433,8 +436,9 @@ TRACE_EVENT(io_uring_poll_arm,
|
||||
__entry->events = events;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x",
|
||||
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x",
|
||||
__entry->ctx, __entry->req, __entry->user_data,
|
||||
io_uring_get_opcode(__entry->opcode),
|
||||
__entry->mask, __entry->events)
|
||||
);
|
||||
|
||||
@ -470,8 +474,9 @@ TRACE_EVENT(io_uring_task_add,
|
||||
__entry->mask = mask;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x",
|
||||
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x",
|
||||
__entry->ctx, __entry->req, __entry->user_data,
|
||||
io_uring_get_opcode(__entry->opcode),
|
||||
__entry->mask)
|
||||
);
|
||||
|
||||
@ -530,12 +535,13 @@ TRACE_EVENT(io_uring_req_failed,
|
||||
),
|
||||
|
||||
TP_printk("ring %p, req %p, user_data 0x%llx, "
|
||||
"op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
|
||||
"opcode %s, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
|
||||
"len=%u, rw_flags=0x%x, buf_index=%d, "
|
||||
"personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, "
|
||||
"error=%d",
|
||||
__entry->ctx, __entry->req, __entry->user_data,
|
||||
__entry->opcode, __entry->flags, __entry->ioprio,
|
||||
io_uring_get_opcode(__entry->opcode),
|
||||
__entry->flags, __entry->ioprio,
|
||||
(unsigned long long)__entry->off,
|
||||
(unsigned long long) __entry->addr, __entry->len,
|
||||
__entry->op_flags,
|
||||
|
@ -128,7 +128,7 @@ enum {
|
||||
*/
|
||||
#define IORING_SETUP_TASKRUN_FLAG (1U << 9)
|
||||
|
||||
enum {
|
||||
enum io_uring_op {
|
||||
IORING_OP_NOP,
|
||||
IORING_OP_READV,
|
||||
IORING_OP_WRITEV,
|
||||
@ -174,6 +174,7 @@ enum {
|
||||
IORING_OP_SETXATTR,
|
||||
IORING_OP_FGETXATTR,
|
||||
IORING_OP_GETXATTR,
|
||||
IORING_OP_SOCKET,
|
||||
|
||||
/* this goes last, obviously */
|
||||
IORING_OP_LAST,
|
||||
|
54
net/socket.c
54
net/socket.c
@ -504,7 +504,7 @@ static int sock_map_fd(struct socket *sock, int flags)
|
||||
struct socket *sock_from_file(struct file *file)
|
||||
{
|
||||
if (file->f_op == &socket_file_ops)
|
||||
return file->private_data; /* set in sock_map_fd */
|
||||
return file->private_data; /* set in sock_alloc_file */
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@ -1538,11 +1538,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct
|
||||
}
|
||||
EXPORT_SYMBOL(sock_create_kern);
|
||||
|
||||
int __sys_socket(int family, int type, int protocol)
|
||||
static struct socket *__sys_socket_create(int family, int type, int protocol)
|
||||
{
|
||||
int retval;
|
||||
struct socket *sock;
|
||||
int flags;
|
||||
int retval;
|
||||
|
||||
/* Check the SOCK_* constants for consistency. */
|
||||
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
|
||||
@ -1550,17 +1549,50 @@ int __sys_socket(int family, int type, int protocol)
|
||||
BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
|
||||
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
|
||||
|
||||
flags = type & ~SOCK_TYPE_MASK;
|
||||
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
|
||||
return -EINVAL;
|
||||
if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
|
||||
return ERR_PTR(-EINVAL);
|
||||
type &= SOCK_TYPE_MASK;
|
||||
|
||||
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
|
||||
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
|
||||
|
||||
retval = sock_create(family, type, protocol, &sock);
|
||||
if (retval < 0)
|
||||
return retval;
|
||||
return ERR_PTR(retval);
|
||||
|
||||
return sock;
|
||||
}
|
||||
|
||||
struct file *__sys_socket_file(int family, int type, int protocol)
|
||||
{
|
||||
struct socket *sock;
|
||||
struct file *file;
|
||||
int flags;
|
||||
|
||||
sock = __sys_socket_create(family, type, protocol);
|
||||
if (IS_ERR(sock))
|
||||
return ERR_CAST(sock);
|
||||
|
||||
flags = type & ~SOCK_TYPE_MASK;
|
||||
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
|
||||
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
|
||||
|
||||
file = sock_alloc_file(sock, flags, NULL);
|
||||
if (IS_ERR(file))
|
||||
sock_release(sock);
|
||||
|
||||
return file;
|
||||
}
|
||||
|
||||
int __sys_socket(int family, int type, int protocol)
|
||||
{
|
||||
struct socket *sock;
|
||||
int flags;
|
||||
|
||||
sock = __sys_socket_create(family, type, protocol);
|
||||
if (IS_ERR(sock))
|
||||
return PTR_ERR(sock);
|
||||
|
||||
flags = type & ~SOCK_TYPE_MASK;
|
||||
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
|
||||
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
|
||||
|
||||
return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user