From b4c458b3a23d76936e76678f2074b1528f129f7a Mon Sep 17 00:00:00 2001 From: Csaba Henk Date: Mon, 29 Jun 2009 03:26:53 +0200 Subject: [PATCH 1/4] fuse: fix return value of fuse_dev_write() On 64 bit systems -- where sizeof(ssize_t) > sizeof(int) -- the following test exposes a bug due to a non-careful return of an int or unsigned value: implement a FUSE filesystem which sends an unsolicited notification to the kernel with invalid opcode. The respective write to /dev/fuse will return (1 << 32) - EINVAL with errno == 0 instead of -1 with errno == EINVAL. Signed-off-by: Miklos Szeredi CC: stable@kernel.org --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8fed2ed12f38..8a11a8c67c42 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -910,7 +910,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { int err; - unsigned nbytes = iov_length(iov, nr_segs); + size_t nbytes = iov_length(iov, nr_segs); struct fuse_req *req; struct fuse_out_header oh; struct fuse_copy_state cs; From 201fa69a2849536ef2912e8e971ec0b01c04eff4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 30 Jun 2009 20:06:24 +0200 Subject: [PATCH 2/4] fuse: fix bad return value in fuse_file_poll() Fix fuse_file_poll() which returned a -errno value instead of a poll mask. Signed-off-by: Miklos Szeredi CC: stable@kernel.org --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index fce6ce694fde..cbc464043b6f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1922,7 +1922,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) req = fuse_get_req(fc); if (IS_ERR(req)) - return PTR_ERR(req); + return POLLERR; req->in.h.opcode = FUSE_POLL; req->in.h.nodeid = ff->nodeid; From e0a43ddcc08c34dbd666d93600fd23914505f4aa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 30 Jun 2009 20:12:23 +0200 Subject: [PATCH 3/4] fuse: allow umask processing in userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch lets filesystems handle masking the file mode on creation. This is needed if filesystem is using ACLs. - The CREATE, MKDIR and MKNOD requests are extended with a "umask" parameter. - A new FUSE_DONT_MASK flag is added to the INIT request/reply. With this the filesystem may request that the create mode is not masked. CC: Jean-Pierre André Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 20 +++++++++++++++++--- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 9 ++++++++- include/linux/fuse.h | 20 ++++++++++++++++++-- 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b3089a083d30..6b700734e519 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -375,7 +375,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_req *req; struct fuse_req *forget_req; - struct fuse_open_in inarg; + struct fuse_create_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; struct fuse_file *ff; @@ -399,15 +399,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (!ff) goto out_put_request; + if (!fc->dont_mask) + mode &= ~current_umask(); + flags &= ~O_NOCTTY; memset(&inarg, 0, sizeof(inarg)); memset(&outentry, 0, sizeof(outentry)); inarg.flags = flags; inarg.mode = mode; + inarg.umask = current_umask(); req->in.h.opcode = FUSE_CREATE; req->in.h.nodeid = get_node_id(dir); req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) : + sizeof(inarg); req->in.args[0].value = &inarg; req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; @@ -546,12 +551,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, if (IS_ERR(req)) return PTR_ERR(req); + if (!fc->dont_mask) + mode &= ~current_umask(); + memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; inarg.rdev = new_encode_dev(rdev); + inarg.umask = current_umask(); req->in.h.opcode = FUSE_MKNOD; req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE : + sizeof(inarg); req->in.args[0].value = &inarg; req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; @@ -578,8 +588,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) if (IS_ERR(req)) return PTR_ERR(req); + if (!fc->dont_mask) + mode &= ~current_umask(); + memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; + inarg.umask = current_umask(); req->in.h.opcode = FUSE_MKDIR; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index aaf2f9ff970e..ede4f77b2d6c 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -446,6 +446,9 @@ struct fuse_conn { /** Do multi-page cached writes */ unsigned big_writes:1; + /** Don't apply umask to creation modes */ + unsigned dont_mask:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d8673ccf90b7..6cc501bd0187 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -725,6 +725,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) } if (arg->flags & FUSE_BIG_WRITES) fc->big_writes = 1; + if (arg->flags & FUSE_DONT_MASK) + fc->dont_mask = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -748,7 +750,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | - FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES; + FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); @@ -864,6 +866,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (err) goto err_put_conn; + /* Handle umasking inside the fuse code */ + if (sb->s_flags & MS_POSIXACL) + fc->dont_mask = 1; + sb->s_flags |= MS_POSIXACL; + fc->release = fuse_free_conn; fc->flags = d.flags; fc->user_id = d.user_id; diff --git a/include/linux/fuse.h b/include/linux/fuse.h index d41ed593f79f..e2b816a62488 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -25,6 +25,9 @@ * - add IOCTL message * - add unsolicited notification support * - add POLL message and NOTIFY_POLL notification + * + * 7.12 + * - add umask flag to input argument of open, mknod and mkdir */ #ifndef _LINUX_FUSE_H @@ -36,7 +39,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 11 +#define FUSE_KERNEL_MINOR_VERSION 12 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -112,6 +115,7 @@ struct fuse_file_lock { * INIT request/reply flags * * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." + * FUSE_DONT_MASK: don't apply umask to file mode on create operations */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -119,6 +123,7 @@ struct fuse_file_lock { #define FUSE_ATOMIC_O_TRUNC (1 << 3) #define FUSE_EXPORT_SUPPORT (1 << 4) #define FUSE_BIG_WRITES (1 << 5) +#define FUSE_DONT_MASK (1 << 6) /** * CUSE INIT request/reply flags @@ -262,14 +267,18 @@ struct fuse_attr_out { struct fuse_attr attr; }; +#define FUSE_COMPAT_MKNOD_IN_SIZE 8 + struct fuse_mknod_in { __u32 mode; __u32 rdev; + __u32 umask; + __u32 padding; }; struct fuse_mkdir_in { __u32 mode; - __u32 padding; + __u32 umask; }; struct fuse_rename_in { @@ -300,8 +309,15 @@ struct fuse_setattr_in { }; struct fuse_open_in { + __u32 flags; + __u32 unused; +}; + +struct fuse_create_in { __u32 flags; __u32 mode; + __u32 umask; + __u32 padding; }; struct fuse_open_out { From 3b463ae0c6264f70e5d4c0a9c46af20fed43c96e Mon Sep 17 00:00:00 2001 From: John Muir Date: Sun, 31 May 2009 11:13:57 -0400 Subject: [PATCH 4/4] fuse: invalidation reverse calls Add notification messages that allow the filesystem to invalidate VFS caches. Two notifications are added: 1) inode invalidation - invalidate cached attributes - invalidate a range of pages in the page cache (this is optional) 2) dentry invalidation - try to invalidate a subtree in the dentry cache Care must be taken while accessing the 'struct super_block' for the mount, as it can go away while an invalidation is in progress. To prevent this, introduce a rw-semaphore, that is taken for read during the invalidation and taken for write in the ->kill_sb callback. Cc: Csaba Henk Cc: Anand Avati Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 81 ++++++++++++++++++++++++++++++++++++++++++++ fs/fuse/dir.c | 37 ++++++++++++++++++++ fs/fuse/fuse_i.h | 24 +++++++++++++ fs/fuse/inode.c | 59 ++++++++++++++++++++++++++++++-- include/linux/fuse.h | 16 +++++++++ 5 files changed, 214 insertions(+), 3 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8a11a8c67c42..f58ecbc416c8 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -849,6 +849,81 @@ err: return err; } +static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_inode_out outarg; + int err = -EINVAL; + + if (size != sizeof(outarg)) + goto err; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto err; + fuse_copy_finish(cs); + + down_read(&fc->killsb); + err = -ENOENT; + if (!fc->sb) + goto err_unlock; + + err = fuse_reverse_inval_inode(fc->sb, outarg.ino, + outarg.off, outarg.len); + +err_unlock: + up_read(&fc->killsb); + return err; + +err: + fuse_copy_finish(cs); + return err; +} + +static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_entry_out outarg; + int err = -EINVAL; + char buf[FUSE_NAME_MAX+1]; + struct qstr name; + + if (size < sizeof(outarg)) + goto err; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto err; + + err = -ENAMETOOLONG; + if (outarg.namelen > FUSE_NAME_MAX) + goto err; + + name.name = buf; + name.len = outarg.namelen; + err = fuse_copy_one(cs, buf, outarg.namelen + 1); + if (err) + goto err; + fuse_copy_finish(cs); + buf[outarg.namelen] = 0; + name.hash = full_name_hash(name.name, name.len); + + down_read(&fc->killsb); + err = -ENOENT; + if (!fc->sb) + goto err_unlock; + + err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name); + +err_unlock: + up_read(&fc->killsb); + return err; + +err: + fuse_copy_finish(cs); + return err; +} + static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { @@ -856,6 +931,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, case FUSE_NOTIFY_POLL: return fuse_notify_poll(fc, size, cs); + case FUSE_NOTIFY_INVAL_INODE: + return fuse_notify_inval_inode(fc, size, cs); + + case FUSE_NOTIFY_INVAL_ENTRY: + return fuse_notify_inval_entry(fc, size, cs); + default: fuse_copy_finish(cs); return -EINVAL; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 6b700734e519..e703654e7f40 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -859,6 +859,43 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, return err; } +int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, + struct qstr *name) +{ + int err = -ENOTDIR; + struct inode *parent; + struct dentry *dir; + struct dentry *entry; + + parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid); + if (!parent) + return -ENOENT; + + mutex_lock(&parent->i_mutex); + if (!S_ISDIR(parent->i_mode)) + goto unlock; + + err = -ENOENT; + dir = d_find_alias(parent); + if (!dir) + goto unlock; + + entry = d_lookup(dir, name); + dput(dir); + if (!entry) + goto unlock; + + fuse_invalidate_attr(parent); + fuse_invalidate_entry(entry); + dput(entry); + err = 0; + + unlock: + mutex_unlock(&parent->i_mutex); + iput(parent); + return err; +} + /* * Calling into a user-controlled filesystem gives the filesystem * daemon ptrace-like capabilities over the requester process. This diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index ede4f77b2d6c..52b641fc0faf 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -484,6 +484,12 @@ struct fuse_conn { /** Called on final put */ void (*release)(struct fuse_conn *); + + /** Super block for this connection. */ + struct super_block *sb; + + /** Read/write semaphore to hold when accessing sb. */ + struct rw_semaphore killsb; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -511,6 +517,11 @@ extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; +/** + * Inode to nodeid comparison. + */ +int fuse_inode_eq(struct inode *inode, void *_nodeidp); + /** * Get a filled in inode */ @@ -711,6 +722,19 @@ void fuse_release_nowrite(struct inode *inode); u64 fuse_get_attr_version(struct fuse_conn *fc); +/** + * File-system tells the kernel to invalidate cache for the given node id. + */ +int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, + loff_t offset, loff_t len); + +/** + * File-system tells the kernel to invalidate parent attributes and + * the dentry matching parent/name. + */ +int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, + struct qstr *name); + int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir); ssize_t fuse_direct_io(struct file *file, const char __user *buf, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6cc501bd0187..f91ccc4a189d 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -206,7 +206,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) BUG(); } -static int fuse_inode_eq(struct inode *inode, void *_nodeidp) +int fuse_inode_eq(struct inode *inode, void *_nodeidp) { u64 nodeid = *(u64 *) _nodeidp; if (get_node_id(inode) == nodeid) @@ -257,6 +257,31 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, return inode; } +int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, + loff_t offset, loff_t len) +{ + struct inode *inode; + pgoff_t pg_start; + pgoff_t pg_end; + + inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid); + if (!inode) + return -ENOENT; + + fuse_invalidate_attr(inode); + if (offset >= 0) { + pg_start = offset >> PAGE_CACHE_SHIFT; + if (len <= 0) + pg_end = -1; + else + pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT; + invalidate_inode_pages2_range(inode->i_mapping, + pg_start, pg_end); + } + iput(inode); + return 0; +} + static void fuse_umount_begin(struct super_block *sb) { fuse_abort_conn(get_fuse_conn_super(sb)); @@ -480,6 +505,7 @@ void fuse_conn_init(struct fuse_conn *fc) memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); mutex_init(&fc->inst_mutex); + init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); init_waitqueue_head(&fc->waitq); init_waitqueue_head(&fc->blocked_waitq); @@ -862,6 +888,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fuse_conn_init(fc); fc->dev = sb->s_dev; + fc->sb = sb; err = fuse_bdi_init(fc, sb); if (err) goto err_put_conn; @@ -948,12 +975,25 @@ static int fuse_get_sb(struct file_system_type *fs_type, return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); } +static void fuse_kill_sb_anon(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (fc) { + down_write(&fc->killsb); + fc->sb = NULL; + up_write(&fc->killsb); + } + + kill_anon_super(sb); +} + static struct file_system_type fuse_fs_type = { .owner = THIS_MODULE, .name = "fuse", .fs_flags = FS_HAS_SUBTYPE, .get_sb = fuse_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = fuse_kill_sb_anon, }; #ifdef CONFIG_BLOCK @@ -965,11 +1005,24 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type, mnt); } +static void fuse_kill_sb_blk(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (fc) { + down_write(&fc->killsb); + fc->sb = NULL; + up_write(&fc->killsb); + } + + kill_block_super(sb); +} + static struct file_system_type fuseblk_fs_type = { .owner = THIS_MODULE, .name = "fuseblk", .get_sb = fuse_get_sb_blk, - .kill_sb = kill_block_super, + .kill_sb = fuse_kill_sb_blk, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; diff --git a/include/linux/fuse.h b/include/linux/fuse.h index e2b816a62488..cf593bf9fd32 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -28,6 +28,8 @@ * * 7.12 * - add umask flag to input argument of open, mknod and mkdir + * - add notification messages for invalidation of inodes and + * directory entries */ #ifndef _LINUX_FUSE_H @@ -229,6 +231,8 @@ enum fuse_opcode { enum fuse_notify_code { FUSE_NOTIFY_POLL = 1, + FUSE_NOTIFY_INVAL_INODE = 2, + FUSE_NOTIFY_INVAL_ENTRY = 3, FUSE_NOTIFY_CODE_MAX, }; @@ -524,4 +528,16 @@ struct fuse_dirent { #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) +struct fuse_notify_inval_inode_out { + __u64 ino; + __s64 off; + __s64 len; +}; + +struct fuse_notify_inval_entry_out { + __u64 parent; + __u32 namelen; + __u32 padding; +}; + #endif /* _LINUX_FUSE_H */