diff --git a/fs/mount.h b/fs/mount.h index 0a78f85cf737..185fc56afc13 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -154,3 +154,16 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list) } bool has_locked_children(struct mount *mnt, struct dentry *dentry); +struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mnt_ns, bool previous); +static inline struct mnt_namespace *lookup_next_mnt_ns(struct mnt_namespace *mntns) +{ + return __lookup_next_mnt_ns(mntns, false); +} +static inline struct mnt_namespace *lookup_prev_mnt_ns(struct mnt_namespace *mntns) +{ + return __lookup_next_mnt_ns(mntns, true); +} +static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns) +{ + return container_of(ns, struct mnt_namespace, ns); +} diff --git a/fs/namespace.c b/fs/namespace.c index 5f2dddee0074..e71e4564987b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2060,16 +2060,43 @@ static bool is_mnt_ns_file(struct dentry *dentry) dentry->d_fsdata == &mntns_operations; } -static struct mnt_namespace *to_mnt_ns(struct ns_common *ns) -{ - return container_of(ns, struct mnt_namespace, ns); -} - struct ns_common *from_mnt_ns(struct mnt_namespace *mnt) { return &mnt->ns; } +struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mntns, bool previous) +{ + guard(read_lock)(&mnt_ns_tree_lock); + for (;;) { + struct rb_node *node; + + if (previous) + node = rb_prev(&mntns->mnt_ns_tree_node); + else + node = rb_next(&mntns->mnt_ns_tree_node); + if (!node) + return ERR_PTR(-ENOENT); + + mntns = node_to_mnt_ns(node); + node = &mntns->mnt_ns_tree_node; + + if (!ns_capable_noaudit(mntns->user_ns, CAP_SYS_ADMIN)) + continue; + + /* + * Holding mnt_ns_tree_lock prevents the mount namespace from + * being freed but it may well be on it's deathbed. We want an + * active reference, not just a passive one here as we're + * persisting the mount namespace. + */ + if (!refcount_inc_not_zero(&mntns->ns.count)) + continue; + + return mntns; + } +} + static bool mnt_ns_loop(struct dentry *dentry) { /* Could bind mounting the mount namespace inode cause a @@ -5251,12 +5278,37 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req, * that, or if not simply grab a passive reference on our mount namespace and * return that. */ -static struct mnt_namespace *grab_requested_mnt_ns(u64 mnt_ns_id) +static struct mnt_namespace *grab_requested_mnt_ns(const struct mnt_id_req *kreq) { - if (mnt_ns_id) - return lookup_mnt_ns(mnt_ns_id); - refcount_inc(¤t->nsproxy->mnt_ns->passive); - return current->nsproxy->mnt_ns; + struct mnt_namespace *mnt_ns; + + if (kreq->mnt_ns_id && kreq->spare) + return ERR_PTR(-EINVAL); + + if (kreq->mnt_ns_id) + return lookup_mnt_ns(kreq->mnt_ns_id); + + if (kreq->spare) { + struct ns_common *ns; + + CLASS(fd, f)(kreq->spare); + if (!f.file) + return ERR_PTR(-EBADF); + + if (!proc_ns_file(f.file)) + return ERR_PTR(-EINVAL); + + ns = get_proc_ns(file_inode(f.file)); + if (ns->ops->type != CLONE_NEWNS) + return ERR_PTR(-EINVAL); + + mnt_ns = to_mnt_ns(ns); + } else { + mnt_ns = current->nsproxy->mnt_ns; + } + + refcount_inc(&mnt_ns->passive); + return mnt_ns; } SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req, @@ -5277,7 +5329,7 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req, if (ret) return ret; - ns = grab_requested_mnt_ns(kreq.mnt_ns_id); + ns = grab_requested_mnt_ns(&kreq); if (!ns) return -ENOENT; @@ -5404,7 +5456,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, if (!kmnt_ids) return -ENOMEM; - ns = grab_requested_mnt_ns(kreq.mnt_ns_id); + ns = grab_requested_mnt_ns(&kreq); if (!ns) return -ENOENT; diff --git a/fs/nsfs.c b/fs/nsfs.c index 97c37a9631e5..67ee176b8824 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "mount.h" #include "internal.h" @@ -128,6 +129,30 @@ int open_related_ns(struct ns_common *ns, } EXPORT_SYMBOL_GPL(open_related_ns); +static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns, + struct mnt_ns_info __user *uinfo, size_t usize, + struct mnt_ns_info *kinfo) +{ + /* + * If userspace and the kernel have the same struct size it can just + * be copied. If userspace provides an older struct, only the bits that + * userspace knows about will be copied. If userspace provides a new + * struct, only the bits that the kernel knows aobut will be copied and + * the size value will be set to the size the kernel knows about. + */ + kinfo->size = min(usize, sizeof(*kinfo)); + kinfo->mnt_ns_id = mnt_ns->seq; + kinfo->nr_mounts = READ_ONCE(mnt_ns->nr_mounts); + /* Subtract the root mount of the mount namespace. */ + if (kinfo->nr_mounts) + kinfo->nr_mounts--; + + if (copy_to_user(uinfo, kinfo, kinfo->size)) + return -EFAULT; + + return 0; +} + static long ns_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -135,6 +160,8 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, struct pid_namespace *pid_ns; struct task_struct *tsk; struct ns_common *ns = get_proc_ns(file_inode(filp)); + struct mnt_namespace *mnt_ns; + bool previous = false; uid_t __user *argp; uid_t uid; int ret; @@ -156,7 +183,6 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, uid = from_kuid_munged(current_user_ns(), user_ns->owner); return put_user(uid, argp); case NS_GET_MNTNS_ID: { - struct mnt_namespace *mnt_ns; __u64 __user *idp; __u64 id; @@ -211,7 +237,79 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, if (!ret) ret = -ESRCH; - break; + return ret; + } + } + + /* extensible ioctls */ + switch (_IOC_NR(ioctl)) { + case _IOC_NR(NS_MNT_GET_INFO): { + struct mnt_ns_info kinfo = {}; + struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; + size_t usize = _IOC_SIZE(ioctl); + + if (ns->ops->type != CLONE_NEWNS) + return -EINVAL; + + if (!uinfo) + return -EINVAL; + + if (usize < MNT_NS_INFO_SIZE_VER0) + return -EINVAL; + + return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); + } + case _IOC_NR(NS_MNT_GET_PREV): + previous = true; + fallthrough; + case _IOC_NR(NS_MNT_GET_NEXT): { + struct mnt_ns_info kinfo = {}; + struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; + struct path path __free(path_put) = {}; + struct file *f __free(fput) = NULL; + size_t usize = _IOC_SIZE(ioctl); + + if (ns->ops->type != CLONE_NEWNS) + return -EINVAL; + + if (usize < MNT_NS_INFO_SIZE_VER0) + return -EINVAL; + + if (previous) + mnt_ns = lookup_prev_mnt_ns(to_mnt_ns(ns)); + else + mnt_ns = lookup_next_mnt_ns(to_mnt_ns(ns)); + if (IS_ERR(mnt_ns)) + return PTR_ERR(mnt_ns); + + ns = to_ns_common(mnt_ns); + /* Transfer ownership of @mnt_ns reference to @path. */ + ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); + if (ret) + return ret; + + CLASS(get_unused_fd, fd)(O_CLOEXEC); + if (fd < 0) + return fd; + + f = dentry_open(&path, O_RDONLY, current_cred()); + if (IS_ERR(f)) + return PTR_ERR(f); + + if (uinfo) { + /* + * If @uinfo is passed return all information about the + * mount namespace as well. + */ + ret = copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); + if (ret) + return ret; + } + + /* Transfer reference of @f to caller's fdtable. */ + fd_install(fd, no_free_ptr(f)); + /* File descriptor is live so hand it off to the caller. */ + return take_fd(fd); } default: ret = -ENOTTY; diff --git a/include/linux/file.h b/include/linux/file.h index 59b146a14dca..6bd9cd9c87e5 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -11,6 +11,7 @@ #include #include #include +#include struct file; @@ -96,6 +97,7 @@ extern void put_unused_fd(unsigned int fd); DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), get_unused_fd_flags(flags), unsigned flags) +DEFINE_FREE(fput, struct file *, if (!IS_ERR_OR_NULL(_T)) fput(_T)) /* * take_fd() will take care to set @fd to -EBADF ensuring that diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 8f882f5881e8..70b366b64816 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -3,6 +3,9 @@ #define _NAMESPACE_H_ #ifdef __KERNEL__ +#include +#include + struct mnt_namespace; struct fs_struct; struct user_namespace; @@ -11,6 +14,7 @@ struct ns_common; extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct user_namespace *, struct fs_struct *); extern void put_mnt_ns(struct mnt_namespace *ns); +DEFINE_FREE(put_mnt_ns, struct mnt_namespace *, if (!IS_ERR_OR_NULL(_T)) put_mnt_ns(_T)) extern struct ns_common *from_mnt_ns(struct mnt_namespace *); extern const struct file_operations proc_mounts_operations; diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index 5fad3d0fcd70..34127653fd00 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -27,4 +27,19 @@ /* Return thread-group leader id of pid in the target pid namespace. */ #define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int) +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ + +/* Get information about namespace. */ +#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info) +/* Get next namespace. */ +#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info) +/* Get previous namespace. */ +#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) + #endif /* __LINUX_NSFS_H */