From 9de5be06d0a89ca97b5ab902694d42dfd2bb77d2 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 24 Apr 2019 17:05:06 +0200 Subject: [PATCH 01/13] fuse: fix writepages on 32bit Writepage requests were cropped to i_size & 0xffffffff, which meant that mmaped writes to any file larger than 4G might be silently discarded. Fix by storing the file size in a properly sized variable (loff_t instead of size_t). Reported-by: Antonio SJ Musumeci Fixes: 6eaf4782eb09 ("fuse: writepages: crop secondary requests") Cc: # v3.13 Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 06096b60f1df..5428c81879b2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1586,7 +1586,7 @@ __acquires(fi->lock) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - size_t crop = i_size_read(inode); + loff_t crop = i_size_read(inode); struct fuse_req *req; while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { From 0cbade024ba501313da3b7e5dd2a188a6bc491b5 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 18 Apr 2019 04:04:41 +0800 Subject: [PATCH 02/13] fuse: honor RLIMIT_FSIZE in fuse_file_fallocate fstests generic/228 reported this failure that fuse fallocate does not honor what 'ulimit -f' has set. This adds the necessary inode_newsize_ok() check. Signed-off-by: Liu Bo Fixes: 05ba1f082300 ("fuse: add FALLOCATE operation") Cc: # v3.5 Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5428c81879b2..f811af4f6507 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3044,6 +3044,13 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, } } + if (!(mode & FALLOC_FL_KEEP_SIZE) && + offset + length > i_size_read(inode)) { + err = inode_newsize_ok(inode, offset + length); + if (err) + return err; + } + if (!(mode & FALLOC_FL_KEEP_SIZE)) set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); From f2294482ff65dd9c9c3c6ae1447f908c6aa60f52 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Wed, 27 Mar 2019 09:15:17 +0000 Subject: [PATCH 03/13] fuse: convert printk -> pr_* Functions, like pr_err, are a more modern variant of printing compared to printk. They could be used to denoise sources by using needed level in the print function name, and by automatically inserting per-driver / function / ... print prefix as defined by pr_fmt macro. pr_* are also said to be used in Documentation/process/coding-style.rst and more recent code - for example overlayfs - uses them instead of printk. Convert CUSE and FUSE to use the new pr_* functions. CUSE output stays completely unchanged, while FUSE output is amended a bit for "trying to steal weird page" warning - the second line now comes also with "fuse:" prefix. I hope it is ok. Suggested-by: Kirill Tkhai Signed-off-by: Kirill Smelkov Reviewed-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 13 +++++++------ fs/fuse/dev.c | 4 ++-- fs/fuse/fuse_i.h | 4 ++++ fs/fuse/inode.c | 6 +++--- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 55a26f351467..4b41df1d4642 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -33,6 +33,8 @@ * closed. */ +#define pr_fmt(fmt) "CUSE: " fmt + #include #include #include @@ -225,7 +227,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp) return 0; if (end[-1] != '\0') { - printk(KERN_ERR "CUSE: info not properly terminated\n"); + pr_err("info not properly terminated\n"); return -EINVAL; } @@ -242,7 +244,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp) key = strstrip(key); if (!strlen(key)) { - printk(KERN_ERR "CUSE: zero length info key specified\n"); + pr_err("zero length info key specified\n"); return -EINVAL; } @@ -282,12 +284,11 @@ static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo) if (strcmp(key, "DEVNAME") == 0) devinfo->name = val; else - printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n", - key); + pr_warn("unknown device info \"%s\"\n", key); } if (!devinfo->name || !strlen(devinfo->name)) { - printk(KERN_ERR "CUSE: DEVNAME unspecified\n"); + pr_err("DEVNAME unspecified\n"); return -EINVAL; } @@ -341,7 +342,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) else rc = register_chrdev_region(devt, 1, devinfo.name); if (rc) { - printk(KERN_ERR "CUSE: failed to register chrdev region\n"); + pr_err("failed to register chrdev region\n"); goto err; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9971a35cf1ef..2d65a00740ad 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -906,8 +906,8 @@ static int fuse_check_page(struct page *page) 1 << PG_lru | 1 << PG_active | 1 << PG_reclaim))) { - printk(KERN_WARNING "fuse: trying to steal weird page\n"); - printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping); + pr_warn("trying to steal weird page\n"); + pr_warn(" page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping); return 1; } return 0; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0920c0c032a0..e6195bc8f836 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -9,6 +9,10 @@ #ifndef _FS_FUSE_I_H #define _FS_FUSE_I_H +#ifndef pr_fmt +# define pr_fmt(fmt) "fuse: " fmt +#endif + #include #include #include diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ec5d9953dfb6..36981ea7eac0 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1397,8 +1397,8 @@ static int __init fuse_init(void) { int res; - printk(KERN_INFO "fuse init (API version %i.%i)\n", - FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); + pr_info("init (API version %i.%i)\n", + FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); INIT_LIST_HEAD(&fuse_conn_list); res = fuse_fs_init(); @@ -1434,7 +1434,7 @@ static int __init fuse_init(void) static void __exit fuse_exit(void) { - printk(KERN_DEBUG "fuse exit\n"); + pr_debug("exit\n"); fuse_ctl_cleanup(); fuse_sysfs_cleanup(); From ad2ba64dd489805e7ddf5fecf166cae1e09fc5c0 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Wed, 27 Mar 2019 11:14:15 +0000 Subject: [PATCH 04/13] fuse: allow filesystems to have precise control over data cache On networked filesystems file data can be changed externally. FUSE provides notification messages for filesystem to inform kernel that metadata or data region of a file needs to be invalidated in local page cache. That provides the basis for filesystem implementations to invalidate kernel cache explicitly based on observed filesystem-specific events. FUSE has also "automatic" invalidation mode(*) when the kernel automatically invalidates data cache of a file if it sees mtime change. It also automatically invalidates whole data cache of a file if it sees file size being changed. The automatic mode has corresponding capability - FUSE_AUTO_INVAL_DATA. However, due to probably historical reason, that capability controls only whether mtime change should be resulting in automatic invalidation or not. A change in file size always results in invalidating whole data cache of a file irregardless of whether FUSE_AUTO_INVAL_DATA was negotiated(+). The filesystem I write[1] represents data arrays stored in networked database as local files suitable for mmap. It is read-only filesystem - changes to data are committed externally via database interfaces and the filesystem only glues data into contiguous file streams suitable for mmap and traditional array processing. The files are big - starting from hundreds gigabytes and more. The files change regularly, and frequently by data being appended to their end. The size of files thus changes frequently. If a file was accessed locally and some part of its data got into page cache, we want that data to stay cached unless there is memory pressure, or unless corresponding part of the file was actually changed. However current FUSE behaviour - when it sees file size change - is to invalidate the whole file. The data cache of the file is thus completely lost even on small size change, and despite that the filesystem server is careful to accurately translate database changes into FUSE invalidation messages to kernel. Let's fix it: if a filesystem, through new FUSE_EXPLICIT_INVAL_DATA capability, indicates to kernel that it is fully responsible for data cache invalidation, then the kernel won't invalidate files data cache on size change and only truncate that cache to new size in case the size decreased. (*) see 72d0d248ca "fuse: add FUSE_AUTO_INVAL_DATA init flag", eed2179efe "fuse: invalidate inode mapping if mtime changes" (+) in writeback mode the kernel does not invalidate data cache on file size change, but neither it allows the filesystem to set the size due to external event (see 8373200b12 "fuse: Trust kernel i_size only") [1] https://lab.nexedi.com/kirr/wendelin.core/blob/a50f1d9f/wcfs/wcfs.go#L20 Signed-off-by: Kirill Smelkov Signed-off-by: Miklos Szeredi --- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 7 +++++-- include/uapi/linux/fuse.h | 7 ++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e6195bc8f836..24dbca777775 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -694,6 +694,9 @@ struct fuse_conn { /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; + /** Filesystem is fully reponsible for page cache invalidation. */ + unsigned explicit_inval_data:1; + /** Does the filesystem support readdirplus? */ unsigned do_readdirplus:1; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 36981ea7eac0..c67b39e88fd5 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -237,7 +237,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, if (oldsize != attr->size) { truncate_pagecache(inode, attr->size); - inval = true; + if (!fc->explicit_inval_data) + inval = true; } else if (fc->auto_inval_data) { struct timespec64 new_mtime = { .tv_sec = attr->mtime, @@ -912,6 +913,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->dont_mask = 1; if (arg->flags & FUSE_AUTO_INVAL_DATA) fc->auto_inval_data = 1; + else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA) + fc->explicit_inval_data = 1; if (arg->flags & FUSE_DO_READDIRPLUS) { fc->do_readdirplus = 1; if (arg->flags & FUSE_READDIRPLUS_AUTO) @@ -973,7 +976,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | - FUSE_NO_OPENDIR_SUPPORT; + FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 2ac598614a8f..36899cfcb654 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -125,6 +125,9 @@ * * 7.29 * - add FUSE_NO_OPENDIR_SUPPORT flag + * + * 7.30 + * - add FUSE_EXPLICIT_INVAL_DATA */ #ifndef _LINUX_FUSE_H @@ -160,7 +163,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 29 +#define FUSE_KERNEL_MINOR_VERSION 30 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -263,6 +266,7 @@ struct fuse_file_lock { * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages * FUSE_CACHE_SYMLINKS: cache READLINK responses * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir + * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -289,6 +293,7 @@ struct fuse_file_lock { #define FUSE_MAX_PAGES (1 << 22) #define FUSE_CACHE_SYMLINKS (1 << 23) #define FUSE_NO_OPENDIR_SUPPORT (1 << 24) +#define FUSE_EXPLICIT_INVAL_DATA (1 << 25) /** * CUSE INIT request/reply flags From 7640682e67b33cab8628729afec8ca92b851394f Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Wed, 27 Mar 2019 10:15:19 +0000 Subject: [PATCH 05/13] fuse: retrieve: cap requested size to negotiated max_write FUSE filesystem server and kernel client negotiate during initialization phase, what should be the maximum write size the client will ever issue. Correspondingly the filesystem server then queues sys_read calls to read requests with buffer capacity large enough to carry request header + that max_write bytes. A filesystem server is free to set its max_write in anywhere in the range between [1*page, fc->max_pages*page]. In particular go-fuse[2] sets max_write by default as 64K, wheres default fc->max_pages corresponds to 128K. Libfuse also allows users to configure max_write, but by default presets it to possible maximum. If max_write is < fc->max_pages*page, and in NOTIFY_RETRIEVE handler we allow to retrieve more than max_write bytes, corresponding prepared NOTIFY_REPLY will be thrown away by fuse_dev_do_read, because the filesystem server, in full correspondence with server/client contract, will be only queuing sys_read with ~max_write buffer capacity, and fuse_dev_do_read throws away requests that cannot fit into server request buffer. In turn the filesystem server could get stuck waiting indefinitely for NOTIFY_REPLY since NOTIFY_RETRIEVE handler returned OK which is understood by clients as that NOTIFY_REPLY was queued and will be sent back. Cap requested size to negotiate max_write to avoid the problem. This aligns with the way NOTIFY_RETRIEVE handler works, which already unconditionally caps requested retrieve size to fuse_conn->max_pages. This way it should not hurt NOTIFY_RETRIEVE semantic if we return less data than was originally requested. Please see [1] for context where the problem of stuck filesystem was hit for real, how the situation was traced and for more involving patch that did not make it into the tree. [1] https://marc.info/?l=linux-fsdevel&m=155057023600853&w=2 [2] https://github.com/hanwen/go-fuse Signed-off-by: Kirill Smelkov Cc: Han-Wen Nienhuys Cc: Jakob Unterwurzacher Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 2d65a00740ad..ea8237513dfa 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1749,7 +1749,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, offset = outarg->offset & ~PAGE_MASK; file_size = i_size_read(inode); - num = outarg->size; + num = min(outarg->size, fc->max_write); if (outarg->offset > file_size) num = 0; else if (outarg->offset + num > file_size) From d4b13963f217dd947da5c0cabd1569e914d21699 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Wed, 27 Mar 2019 10:15:15 +0000 Subject: [PATCH 06/13] fuse: require /dev/fuse reads to have enough buffer capacity A FUSE filesystem server queues /dev/fuse sys_read calls to get filesystem requests to handle. It does not know in advance what would be that request as it can be anything that client issues - LOOKUP, READ, WRITE, ... Many requests are short and retrieve data from the filesystem. However WRITE and NOTIFY_REPLY write data into filesystem. Before getting into operation phase, FUSE filesystem server and kernel client negotiate what should be the maximum write size the client will ever issue. After negotiation the contract in between server/client is that the filesystem server then should queue /dev/fuse sys_read calls with enough buffer capacity to receive any client request - WRITE in particular, while FUSE client should not, in particular, send WRITE requests with > negotiated max_write payload. FUSE client in kernel and libfuse historically reserve 4K for request header. This way the contract is that filesystem server should queue sys_reads with 4K+max_write buffer. If the filesystem server does not follow this contract, what can happen is that fuse_dev_do_read will see that request size is > buffer size, and then it will return EIO to client who issued the request but won't indicate in any way that there is a problem to filesystem server. This can be hard to diagnose because for some requests, e.g. for NOTIFY_REPLY which mimics WRITE, there is no client thread that is waiting for request completion and that EIO goes nowhere, while on filesystem server side things look like the kernel is not replying back after successful NOTIFY_RETRIEVE request made by the server. We can make the problem easy to diagnose if we indicate via error return to filesystem server when it is violating the contract. This should not practically cause problems because if a filesystem server is using shorter buffer, writes to it were already very likely to cause EIO, and if the filesystem is read-only it should be too following FUSE_MIN_READ_BUFFER minimum buffer size. Please see [1] for context where the problem of stuck filesystem was hit for real (because kernel client was incorrectly sending more than max_write data with NOTIFY_REPLY; see also previous patch), how the situation was traced and for more involving patch that did not make it into the tree. [1] https://marc.info/?l=linux-fsdevel&m=155057023600853&w=2 Signed-off-by: Kirill Smelkov Cc: Han-Wen Nienhuys Cc: Jakob Unterwurzacher Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ea8237513dfa..24ea19cfe07e 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1317,6 +1317,16 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, unsigned reqsize; unsigned int hash; + /* + * Require sane minimum read buffer - that has capacity for fixed part + * of any request header + negotated max_write room for data. If the + * requirement is not satisfied return EINVAL to the filesystem server + * to indicate that it is not following FUSE server/client contract. + * Don't dequeue / abort any request. + */ + if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER, 4096 + fc->max_write)) + return -EINVAL; + restart: spin_lock(&fiq->waitq.lock); err = -EAGAIN; From bbd84f33652f852ce5992d65db4d020aba21f882 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Wed, 24 Apr 2019 07:13:57 +0000 Subject: [PATCH 07/13] fuse: Add FOPEN_STREAM to use stream_open() Starting from commit 9c225f2655e3 ("vfs: atomic f_pos accesses as per POSIX") files opened even via nonseekable_open gate read and write via lock and do not allow them to be run simultaneously. This can create read vs write deadlock if a filesystem is trying to implement a socket-like file which is intended to be simultaneously used for both read and write from filesystem client. See commit 10dce8af3422 ("fs: stream_open - opener for stream-like files so that read and write can run simultaneously without deadlock") for details and e.g. commit 581d21a2d02a ("xenbus: fix deadlock on writes to /proc/xen/xenbus") for a similar deadlock example on /proc/xen/xenbus. To avoid such deadlock it was tempting to adjust fuse_finish_open to use stream_open instead of nonseekable_open on just FOPEN_NONSEEKABLE flags, but grepping through Debian codesearch shows users of FOPEN_NONSEEKABLE, and in particular GVFS which actually uses offset in its read and write handlers https://codesearch.debian.net/search?q=-%3Enonseekable+%3D https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1080 https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1247-1346 https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1399-1481 so if we would do such a change it will break a real user. Add another flag (FOPEN_STREAM) for filesystem servers to indicate that the opened handler is having stream-like semantics; does not use file position and thus the kernel is free to issue simultaneous read and write request on opened file handle. This patch together with stream_open() should be added to stable kernels starting from v3.14+. This will allow to patch OSSPD and other FUSE filesystems that provide stream-like files to return FOPEN_STREAM | FOPEN_NONSEEKABLE in open handler and this way avoid the deadlock on all kernel versions. This should work because fuse_finish_open ignores unknown open flags returned from a filesystem and so passing FOPEN_STREAM to a kernel that is not aware of this flag cannot hurt. In turn the kernel that is not aware of FOPEN_STREAM will be < v3.14 where just FOPEN_NONSEEKABLE is sufficient to implement streams without read vs write deadlock. Cc: stable@vger.kernel.org # v3.14+ Signed-off-by: Kirill Smelkov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 4 +++- include/uapi/linux/fuse.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f811af4f6507..92ee15dda4c7 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -178,7 +178,9 @@ void fuse_finish_open(struct inode *inode, struct file *file) if (!(ff->open_flags & FOPEN_KEEP_CACHE)) invalidate_inode_pages2(inode->i_mapping); - if (ff->open_flags & FOPEN_NONSEEKABLE) + if (ff->open_flags & FOPEN_STREAM) + stream_open(inode, file); + else if (ff->open_flags & FOPEN_NONSEEKABLE) nonseekable_open(inode, file); if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) { struct fuse_inode *fi = get_fuse_inode(inode); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 36899cfcb654..17afe2dd8d1c 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -232,11 +232,13 @@ struct fuse_file_lock { * FOPEN_KEEP_CACHE: don't invalidate the data cache on open * FOPEN_NONSEEKABLE: the file is not seekable * FOPEN_CACHE_DIR: allow caching this directory + * FOPEN_STREAM: the file is stream-like (no file position at all) */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) #define FOPEN_NONSEEKABLE (1 << 2) #define FOPEN_CACHE_DIR (1 << 3) +#define FOPEN_STREAM (1 << 4) /** * INIT request/reply flags From 154603fe3ec4620a4c229a127ddbccf5c69f9463 Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Fri, 19 Apr 2019 15:42:44 -0600 Subject: [PATCH 08/13] fuse: document fuse_fsync_in.fsync_flags The FUSE_FSYNC_DATASYNC flag was introduced by commit b6aeadeda22a ("[PATCH] FUSE - file operations") as a magic number. No new values have been added to fsync_flags since. Signed-off-by: Alan Somers Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 2 +- include/uapi/linux/fuse.h | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 92ee15dda4c7..1f9da7a5ad0d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -464,7 +464,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; - inarg.fsync_flags = datasync ? 1 : 0; + inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0; args.in.h.opcode = opcode; args.in.h.nodeid = get_node_id(inode); args.in.numargs = 1; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 17afe2dd8d1c..59d5048df41e 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -360,6 +360,13 @@ struct fuse_file_lock { */ #define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) +/** + * Fsync flags + * + * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata + */ +#define FUSE_FSYNC_FDATASYNC (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ From 68065b84155777335451cf8efdb2e68fddac71ca Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Fri, 19 Apr 2019 15:42:45 -0600 Subject: [PATCH 09/13] fuse: fix changelog entry for protocol 7.12 This was a mistake in the comment in commit e0a43ddcc08c ("fuse: allow umask processing in userspace"). Signed-off-by: Alan Somers Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 59d5048df41e..b7aefea016ae 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -54,7 +54,7 @@ * - add POLL message and NOTIFY_POLL notification * * 7.12 - * - add umask flag to input argument of open, mknod and mkdir + * - add umask flag to input argument of create, mknod and mkdir * - add notification messages for invalidation of inodes and * directory entries * From 7142fd1be3275a64773a2252ed6b97e924677e9f Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Fri, 19 Apr 2019 15:42:46 -0600 Subject: [PATCH 10/13] fuse: fix changelog entry for protocol 7.9 Retroactively add changelog entry for the atime and mtime "now" flags. This was an oversight in commit 17637cbaba59 ("fuse: improve utimes support"). Signed-off-by: Alan Somers Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index b7aefea016ae..c2bece466520 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -44,6 +44,7 @@ * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in * - add blksize field to fuse_attr * - add file flags field to fuse_read_in and fuse_write_in + * - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in * * 7.10 * - add nonseekable open flag From 29cc02d949b19fdeba9de9f54b2641005f5865c6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Mar 2019 23:44:19 +0000 Subject: [PATCH 11/13] fuse: Convert fusectl to use the new mount API Convert the fusectl filesystem to the new internal mount API as the old one will be obsoleted and removed. This allows greater flexibility in communication of mount parameters between userspace, the VFS and the filesystem. See Documentation/filesystems/mount_api.txt for more information. Signed-off-by: David Howells Signed-off-by: Miklos Szeredi --- fs/fuse/control.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index fe80bea4ad89..14ce1e47f980 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -10,6 +10,7 @@ #include #include +#include #define FUSE_CTL_SUPER_MAGIC 0x65735543 @@ -317,7 +318,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) drop_nlink(d_inode(fuse_control_sb->s_root)); } -static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) +static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx) { static const struct tree_descr empty_descr = {""}; struct fuse_conn *fc; @@ -343,10 +344,19 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) +static int fuse_ctl_get_tree(struct fs_context *fc) { - return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super); + return vfs_get_super(fc, vfs_get_single_super, fuse_ctl_fill_super); +} + +static const struct fs_context_operations fuse_ctl_context_ops = { + .get_tree = fuse_ctl_get_tree, +}; + +static int fuse_ctl_init_fs_context(struct fs_context *fc) +{ + fc->ops = &fuse_ctl_context_ops; + return 0; } static void fuse_ctl_kill_sb(struct super_block *sb) @@ -365,7 +375,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb) static struct file_system_type fuse_ctl_fs_type = { .owner = THIS_MODULE, .name = "fusectl", - .mount = fuse_ctl_mount, + .init_fs_context = fuse_ctl_init_fs_context, .kill_sb = fuse_ctl_kill_sb, }; MODULE_ALIAS_FS("fusectl"); From 6407f44aaf2a39b5ccbb1cc1d342b906dcfa8a87 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 24 Apr 2019 15:14:11 +0100 Subject: [PATCH 12/13] fuse: Add ioctl flag for x32 compat ioctl Currently, a CUSE server running on a 64-bit kernel can tell when an ioctl request comes from a process running a 32-bit ABI, but cannot tell whether the requesting process is using legacy IA32 emulation or x32 ABI. In particular, the server does not know the size of the client process's `time_t` type. For 64-bit kernels, the `FUSE_IOCTL_COMPAT` and `FUSE_IOCTL_32BIT` flags are currently set in the ioctl input request (`struct fuse_ioctl_in` member `flags`) for a 32-bit requesting process. This patch defines a new flag `FUSE_IOCTL_COMPAT_X32` and sets it if the 32-bit requesting process is using the x32 ABI. This allows the server process to distinguish between requests coming from client processes using IA32 emulation or the x32 ABI and so infer the size of the client process's `time_t` type and any other IA32/x32 differences. Signed-off-by: Ian Abbott Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 7 ++++++- include/uapi/linux/fuse.h | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1f9da7a5ad0d..3959f08279e6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2578,8 +2578,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, #if BITS_PER_LONG == 32 inarg.flags |= FUSE_IOCTL_32BIT; #else - if (flags & FUSE_IOCTL_COMPAT) + if (flags & FUSE_IOCTL_COMPAT) { inarg.flags |= FUSE_IOCTL_32BIT; +#ifdef CONFIG_X86_X32 + if (in_x32_syscall()) + inarg.flags |= FUSE_IOCTL_COMPAT_X32; +#endif + } #endif /* assume all the iovs returned by client always fits in a page */ diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index c2bece466520..19fb55e3c73e 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -129,6 +129,7 @@ * * 7.30 * - add FUSE_EXPLICIT_INVAL_DATA + * - add FUSE_IOCTL_COMPAT_X32 */ #ifndef _LINUX_FUSE_H @@ -343,6 +344,7 @@ struct fuse_file_lock { * FUSE_IOCTL_RETRY: retry with new iovecs * FUSE_IOCTL_32BIT: 32bit ioctl * FUSE_IOCTL_DIR: is a directory + * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t) * * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs */ @@ -351,6 +353,7 @@ struct fuse_file_lock { #define FUSE_IOCTL_RETRY (1 << 2) #define FUSE_IOCTL_32BIT (1 << 3) #define FUSE_IOCTL_DIR (1 << 4) +#define FUSE_IOCTL_COMPAT_X32 (1 << 5) #define FUSE_IOCTL_MAX_IOV 256 From 9031a69cf9f024a3040c0ed8b8ab01aecd196388 Mon Sep 17 00:00:00 2001 From: zhangliguang Date: Mon, 6 May 2019 16:52:25 +0800 Subject: [PATCH 13/13] fuse: clean up fuse_alloc_inode This patch cleans up fuse_alloc_inode function, just simply the code, no logic change. Signed-off-by: zhangliguang Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index c67b39e88fd5..2dfa5dcb1575 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -81,14 +81,12 @@ struct fuse_forget_link *fuse_alloc_forget(void) static struct inode *fuse_alloc_inode(struct super_block *sb) { - struct inode *inode; struct fuse_inode *fi; - inode = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL); - if (!inode) + fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL); + if (!fi) return NULL; - fi = get_fuse_inode(inode); fi->i_time = 0; fi->inval_mask = 0; fi->nodeid = 0; @@ -100,11 +98,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) spin_lock_init(&fi->lock); fi->forget = fuse_alloc_forget(); if (!fi->forget) { - kmem_cache_free(fuse_inode_cachep, inode); + kmem_cache_free(fuse_inode_cachep, fi); return NULL; } - return inode; + return &fi->inode; } static void fuse_i_callback(struct rcu_head *head)