forked from Minki/linux
kernfs, sysfs, cgroup, intel_rdt: Support fs_context
Make kernfs support superblock creation/mount/remount with fs_context. This requires that sysfs, cgroup and intel_rdt, which are built on kernfs, be made to support fs_context also. Notes: (1) A kernfs_fs_context struct is created to wrap fs_context and the kernfs mount parameters are moved in here (or are in fs_context). (2) kernfs_mount{,_ns}() are made into kernfs_get_tree(). The extra namespace tag parameter is passed in the context if desired (3) kernfs_free_fs_context() is provided as a destructor for the kernfs_fs_context struct, but for the moment it does nothing except get called in the right places. (4) sysfs doesn't wrap kernfs_fs_context since it has no parameters to pass, but possibly this should be done anyway in case someone wants to add a parameter in future. (5) A cgroup_fs_context struct is created to wrap kernfs_fs_context and the cgroup v1 and v2 mount parameters are all moved there. (6) cgroup1 parameter parsing error messages are now handled by invalf(), which allows userspace to collect them directly. (7) cgroup1 parameter cleanup is now done in the context destructor rather than in the mount/get_tree and remount functions. Weirdies: (*) cgroup_do_get_tree() calls cset_cgroup_from_root() with locks held, but then uses the resulting pointer after dropping the locks. I'm told this is okay and needs commenting. (*) The cgroup refcount web. This really needs documenting. (*) cgroup2 only has one root? Add a suggestion from Thomas Gleixner in which the RDT enablement code is placed into its own function. [folded a leak fix from Andrey Vagin] Signed-off-by: David Howells <dhowells@redhat.com> cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> cc: Tejun Heo <tj@kernel.org> cc: Li Zefan <lizefan@huawei.com> cc: Johannes Weiner <hannes@cmpxchg.org> cc: cgroups@vger.kernel.org cc: fenghua.yu@intel.com Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
cca8f32714
commit
23bf1b6be9
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/kernfs.h>
|
#include <linux/kernfs.h>
|
||||||
|
#include <linux/fs_context.h>
|
||||||
#include <linux/jump_label.h>
|
#include <linux/jump_label.h>
|
||||||
|
|
||||||
#define MSR_IA32_L3_QOS_CFG 0xc81
|
#define MSR_IA32_L3_QOS_CFG 0xc81
|
||||||
@ -40,6 +41,21 @@
|
|||||||
#define RMID_VAL_ERROR BIT_ULL(63)
|
#define RMID_VAL_ERROR BIT_ULL(63)
|
||||||
#define RMID_VAL_UNAVAIL BIT_ULL(62)
|
#define RMID_VAL_UNAVAIL BIT_ULL(62)
|
||||||
|
|
||||||
|
|
||||||
|
struct rdt_fs_context {
|
||||||
|
struct kernfs_fs_context kfc;
|
||||||
|
bool enable_cdpl2;
|
||||||
|
bool enable_cdpl3;
|
||||||
|
bool enable_mba_mbps;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
|
||||||
|
{
|
||||||
|
struct kernfs_fs_context *kfc = fc->fs_private;
|
||||||
|
|
||||||
|
return container_of(kfc, struct rdt_fs_context, kfc);
|
||||||
|
}
|
||||||
|
|
||||||
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
|
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include <linux/cpu.h>
|
#include <linux/cpu.h>
|
||||||
#include <linux/debugfs.h>
|
#include <linux/debugfs.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
|
#include <linux/fs_parser.h>
|
||||||
#include <linux/sysfs.h>
|
#include <linux/sysfs.h>
|
||||||
#include <linux/kernfs.h>
|
#include <linux/kernfs.h>
|
||||||
#include <linux/seq_buf.h>
|
#include <linux/seq_buf.h>
|
||||||
@ -32,6 +33,7 @@
|
|||||||
#include <linux/sched/task.h>
|
#include <linux/sched/task.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/task_work.h>
|
#include <linux/task_work.h>
|
||||||
|
#include <linux/user_namespace.h>
|
||||||
|
|
||||||
#include <uapi/linux/magic.h>
|
#include <uapi/linux/magic.h>
|
||||||
|
|
||||||
@ -1858,46 +1860,6 @@ static void cdp_disable_all(void)
|
|||||||
cdpl2_disable();
|
cdpl2_disable();
|
||||||
}
|
}
|
||||||
|
|
||||||
static int parse_rdtgroupfs_options(char *data)
|
|
||||||
{
|
|
||||||
char *token, *o = data;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
while ((token = strsep(&o, ",")) != NULL) {
|
|
||||||
if (!*token) {
|
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!strcmp(token, "cdp")) {
|
|
||||||
ret = cdpl3_enable();
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
} else if (!strcmp(token, "cdpl2")) {
|
|
||||||
ret = cdpl2_enable();
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
} else if (!strcmp(token, "mba_MBps")) {
|
|
||||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
|
|
||||||
ret = set_mba_sc(true);
|
|
||||||
else
|
|
||||||
ret = -EINVAL;
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
} else {
|
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
out:
|
|
||||||
pr_err("Invalid mount option \"%s\"\n", token);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We don't allow rdtgroup directories to be created anywhere
|
* We don't allow rdtgroup directories to be created anywhere
|
||||||
* except the root directory. Thus when looking for the rdtgroup
|
* except the root directory. Thus when looking for the rdtgroup
|
||||||
@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
|
|||||||
struct rdtgroup *prgrp,
|
struct rdtgroup *prgrp,
|
||||||
struct kernfs_node **mon_data_kn);
|
struct kernfs_node **mon_data_kn);
|
||||||
|
|
||||||
static struct dentry *rdt_mount(struct file_system_type *fs_type,
|
static int rdt_enable_ctx(struct rdt_fs_context *ctx)
|
||||||
int flags, const char *unused_dev_name,
|
|
||||||
void *data)
|
|
||||||
{
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (ctx->enable_cdpl2)
|
||||||
|
ret = cdpl2_enable();
|
||||||
|
|
||||||
|
if (!ret && ctx->enable_cdpl3)
|
||||||
|
ret = cdpl3_enable();
|
||||||
|
|
||||||
|
if (!ret && ctx->enable_mba_mbps)
|
||||||
|
ret = set_mba_sc(true);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int rdt_get_tree(struct fs_context *fc)
|
||||||
|
{
|
||||||
|
struct rdt_fs_context *ctx = rdt_fc2context(fc);
|
||||||
struct rdt_domain *dom;
|
struct rdt_domain *dom;
|
||||||
struct rdt_resource *r;
|
struct rdt_resource *r;
|
||||||
struct dentry *dentry;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
cpus_read_lock();
|
cpus_read_lock();
|
||||||
@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
|
|||||||
* resctrl file system can only be mounted once.
|
* resctrl file system can only be mounted once.
|
||||||
*/
|
*/
|
||||||
if (static_branch_unlikely(&rdt_enable_key)) {
|
if (static_branch_unlikely(&rdt_enable_key)) {
|
||||||
dentry = ERR_PTR(-EBUSY);
|
ret = -EBUSY;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = parse_rdtgroupfs_options(data);
|
ret = rdt_enable_ctx(ctx);
|
||||||
if (ret) {
|
if (ret < 0)
|
||||||
dentry = ERR_PTR(ret);
|
|
||||||
goto out_cdp;
|
goto out_cdp;
|
||||||
}
|
|
||||||
|
|
||||||
closid_init();
|
closid_init();
|
||||||
|
|
||||||
ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
|
ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
|
||||||
if (ret) {
|
if (ret < 0)
|
||||||
dentry = ERR_PTR(ret);
|
goto out_mba;
|
||||||
goto out_cdp;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rdt_mon_capable) {
|
if (rdt_mon_capable) {
|
||||||
ret = mongroup_create_dir(rdtgroup_default.kn,
|
ret = mongroup_create_dir(rdtgroup_default.kn,
|
||||||
NULL, "mon_groups",
|
NULL, "mon_groups",
|
||||||
&kn_mongrp);
|
&kn_mongrp);
|
||||||
if (ret) {
|
if (ret < 0)
|
||||||
dentry = ERR_PTR(ret);
|
|
||||||
goto out_info;
|
goto out_info;
|
||||||
}
|
|
||||||
kernfs_get(kn_mongrp);
|
kernfs_get(kn_mongrp);
|
||||||
|
|
||||||
ret = mkdir_mondata_all(rdtgroup_default.kn,
|
ret = mkdir_mondata_all(rdtgroup_default.kn,
|
||||||
&rdtgroup_default, &kn_mondata);
|
&rdtgroup_default, &kn_mondata);
|
||||||
if (ret) {
|
if (ret < 0)
|
||||||
dentry = ERR_PTR(ret);
|
|
||||||
goto out_mongrp;
|
goto out_mongrp;
|
||||||
}
|
|
||||||
kernfs_get(kn_mondata);
|
kernfs_get(kn_mondata);
|
||||||
rdtgroup_default.mon.mon_data_kn = kn_mondata;
|
rdtgroup_default.mon.mon_data_kn = kn_mondata;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = rdt_pseudo_lock_init();
|
ret = rdt_pseudo_lock_init();
|
||||||
if (ret) {
|
if (ret)
|
||||||
dentry = ERR_PTR(ret);
|
|
||||||
goto out_mondata;
|
goto out_mondata;
|
||||||
}
|
|
||||||
|
|
||||||
dentry = kernfs_mount(fs_type, flags, rdt_root,
|
ret = kernfs_get_tree(fc);
|
||||||
RDTGROUP_SUPER_MAGIC, NULL);
|
if (ret < 0)
|
||||||
if (IS_ERR(dentry))
|
|
||||||
goto out_psl;
|
goto out_psl;
|
||||||
|
|
||||||
if (rdt_alloc_capable)
|
if (rdt_alloc_capable)
|
||||||
@ -2059,14 +2024,95 @@ out_mongrp:
|
|||||||
kernfs_remove(kn_mongrp);
|
kernfs_remove(kn_mongrp);
|
||||||
out_info:
|
out_info:
|
||||||
kernfs_remove(kn_info);
|
kernfs_remove(kn_info);
|
||||||
|
out_mba:
|
||||||
|
if (ctx->enable_mba_mbps)
|
||||||
|
set_mba_sc(false);
|
||||||
out_cdp:
|
out_cdp:
|
||||||
cdp_disable_all();
|
cdp_disable_all();
|
||||||
out:
|
out:
|
||||||
rdt_last_cmd_clear();
|
rdt_last_cmd_clear();
|
||||||
mutex_unlock(&rdtgroup_mutex);
|
mutex_unlock(&rdtgroup_mutex);
|
||||||
cpus_read_unlock();
|
cpus_read_unlock();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
return dentry;
|
enum rdt_param {
|
||||||
|
Opt_cdp,
|
||||||
|
Opt_cdpl2,
|
||||||
|
Opt_mba_mpbs,
|
||||||
|
nr__rdt_params
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct fs_parameter_spec rdt_param_specs[] = {
|
||||||
|
fsparam_flag("cdp", Opt_cdp),
|
||||||
|
fsparam_flag("cdpl2", Opt_cdpl2),
|
||||||
|
fsparam_flag("mba_mpbs", Opt_mba_mpbs),
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct fs_parameter_description rdt_fs_parameters = {
|
||||||
|
.name = "rdt",
|
||||||
|
.specs = rdt_param_specs,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||||
|
{
|
||||||
|
struct rdt_fs_context *ctx = rdt_fc2context(fc);
|
||||||
|
struct fs_parse_result result;
|
||||||
|
int opt;
|
||||||
|
|
||||||
|
opt = fs_parse(fc, &rdt_fs_parameters, param, &result);
|
||||||
|
if (opt < 0)
|
||||||
|
return opt;
|
||||||
|
|
||||||
|
switch (opt) {
|
||||||
|
case Opt_cdp:
|
||||||
|
ctx->enable_cdpl3 = true;
|
||||||
|
return 0;
|
||||||
|
case Opt_cdpl2:
|
||||||
|
ctx->enable_cdpl2 = true;
|
||||||
|
return 0;
|
||||||
|
case Opt_mba_mpbs:
|
||||||
|
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||||
|
return -EINVAL;
|
||||||
|
ctx->enable_mba_mbps = true;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rdt_fs_context_free(struct fs_context *fc)
|
||||||
|
{
|
||||||
|
struct rdt_fs_context *ctx = rdt_fc2context(fc);
|
||||||
|
|
||||||
|
kernfs_free_fs_context(fc);
|
||||||
|
kfree(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct fs_context_operations rdt_fs_context_ops = {
|
||||||
|
.free = rdt_fs_context_free,
|
||||||
|
.parse_param = rdt_parse_param,
|
||||||
|
.get_tree = rdt_get_tree,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int rdt_init_fs_context(struct fs_context *fc)
|
||||||
|
{
|
||||||
|
struct rdt_fs_context *ctx;
|
||||||
|
|
||||||
|
ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
|
||||||
|
if (!ctx)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
ctx->kfc.root = rdt_root;
|
||||||
|
ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
|
||||||
|
fc->fs_private = &ctx->kfc;
|
||||||
|
fc->ops = &rdt_fs_context_ops;
|
||||||
|
if (fc->user_ns)
|
||||||
|
put_user_ns(fc->user_ns);
|
||||||
|
fc->user_ns = get_user_ns(&init_user_ns);
|
||||||
|
fc->global = true;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int reset_all_ctrls(struct rdt_resource *r)
|
static int reset_all_ctrls(struct rdt_resource *r)
|
||||||
@ -2239,9 +2285,10 @@ static void rdt_kill_sb(struct super_block *sb)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct file_system_type rdt_fs_type = {
|
static struct file_system_type rdt_fs_type = {
|
||||||
.name = "resctrl",
|
.name = "resctrl",
|
||||||
.mount = rdt_mount,
|
.init_fs_context = rdt_init_fs_context,
|
||||||
.kill_sb = rdt_kill_sb,
|
.parameters = &rdt_fs_parameters,
|
||||||
|
.kill_sb = rdt_kill_sb,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
|
static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <linux/xattr.h>
|
#include <linux/xattr.h>
|
||||||
|
|
||||||
#include <linux/kernfs.h>
|
#include <linux/kernfs.h>
|
||||||
|
#include <linux/fs_context.h>
|
||||||
|
|
||||||
struct kernfs_iattrs {
|
struct kernfs_iattrs {
|
||||||
struct iattr ia_iattr;
|
struct iattr ia_iattr;
|
||||||
|
@ -22,16 +22,6 @@
|
|||||||
|
|
||||||
struct kmem_cache *kernfs_node_cache;
|
struct kmem_cache *kernfs_node_cache;
|
||||||
|
|
||||||
static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data)
|
|
||||||
{
|
|
||||||
struct kernfs_root *root = kernfs_info(sb)->root;
|
|
||||||
struct kernfs_syscall_ops *scops = root->syscall_ops;
|
|
||||||
|
|
||||||
if (scops && scops->remount_fs)
|
|
||||||
return scops->remount_fs(root, flags, data);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
|
static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
|
||||||
{
|
{
|
||||||
struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry));
|
struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry));
|
||||||
@ -60,7 +50,6 @@ const struct super_operations kernfs_sops = {
|
|||||||
.drop_inode = generic_delete_inode,
|
.drop_inode = generic_delete_inode,
|
||||||
.evict_inode = kernfs_evict_inode,
|
.evict_inode = kernfs_evict_inode,
|
||||||
|
|
||||||
.remount_fs = kernfs_sop_remount_fs,
|
|
||||||
.show_options = kernfs_sop_show_options,
|
.show_options = kernfs_sop_show_options,
|
||||||
.show_path = kernfs_sop_show_path,
|
.show_path = kernfs_sop_show_path,
|
||||||
};
|
};
|
||||||
@ -222,7 +211,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
|
|||||||
} while (true);
|
} while (true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
|
static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc)
|
||||||
{
|
{
|
||||||
struct kernfs_super_info *info = kernfs_info(sb);
|
struct kernfs_super_info *info = kernfs_info(sb);
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
@ -233,7 +222,7 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
|
|||||||
sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
|
sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
|
||||||
sb->s_blocksize = PAGE_SIZE;
|
sb->s_blocksize = PAGE_SIZE;
|
||||||
sb->s_blocksize_bits = PAGE_SHIFT;
|
sb->s_blocksize_bits = PAGE_SHIFT;
|
||||||
sb->s_magic = magic;
|
sb->s_magic = kfc->magic;
|
||||||
sb->s_op = &kernfs_sops;
|
sb->s_op = &kernfs_sops;
|
||||||
sb->s_xattr = kernfs_xattr_handlers;
|
sb->s_xattr = kernfs_xattr_handlers;
|
||||||
if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP)
|
if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP)
|
||||||
@ -263,21 +252,20 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kernfs_test_super(struct super_block *sb, void *data)
|
static int kernfs_test_super(struct super_block *sb, struct fs_context *fc)
|
||||||
{
|
{
|
||||||
struct kernfs_super_info *sb_info = kernfs_info(sb);
|
struct kernfs_super_info *sb_info = kernfs_info(sb);
|
||||||
struct kernfs_super_info *info = data;
|
struct kernfs_super_info *info = fc->s_fs_info;
|
||||||
|
|
||||||
return sb_info->root == info->root && sb_info->ns == info->ns;
|
return sb_info->root == info->root && sb_info->ns == info->ns;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kernfs_set_super(struct super_block *sb, void *data)
|
static int kernfs_set_super(struct super_block *sb, struct fs_context *fc)
|
||||||
{
|
{
|
||||||
int error;
|
struct kernfs_fs_context *kfc = fc->fs_private;
|
||||||
error = set_anon_super(sb, data);
|
|
||||||
if (!error)
|
kfc->ns_tag = NULL;
|
||||||
sb->s_fs_info = data;
|
return set_anon_super_fc(sb, fc);
|
||||||
return error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -294,63 +282,60 @@ const void *kernfs_super_ns(struct super_block *sb)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* kernfs_mount_ns - kernfs mount helper
|
* kernfs_get_tree - kernfs filesystem access/retrieval helper
|
||||||
* @fs_type: file_system_type of the fs being mounted
|
* @fc: The filesystem context.
|
||||||
* @flags: mount flags specified for the mount
|
|
||||||
* @root: kernfs_root of the hierarchy being mounted
|
|
||||||
* @magic: file system specific magic number
|
|
||||||
* @new_sb_created: tell the caller if we allocated a new superblock
|
|
||||||
* @ns: optional namespace tag of the mount
|
|
||||||
*
|
*
|
||||||
* This is to be called from each kernfs user's file_system_type->mount()
|
* This is to be called from each kernfs user's fs_context->ops->get_tree()
|
||||||
* implementation, which should pass through the specified @fs_type and
|
* implementation, which should set the specified ->@fs_type and ->@flags, and
|
||||||
* @flags, and specify the hierarchy and namespace tag to mount via @root
|
* specify the hierarchy and namespace tag to mount via ->@root and ->@ns,
|
||||||
* and @ns, respectively.
|
* respectively.
|
||||||
*
|
|
||||||
* The return value can be passed to the vfs layer verbatim.
|
|
||||||
*/
|
*/
|
||||||
struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
|
int kernfs_get_tree(struct fs_context *fc)
|
||||||
struct kernfs_root *root, unsigned long magic,
|
|
||||||
bool *new_sb_created, const void *ns)
|
|
||||||
{
|
{
|
||||||
|
struct kernfs_fs_context *kfc = fc->fs_private;
|
||||||
struct super_block *sb;
|
struct super_block *sb;
|
||||||
struct kernfs_super_info *info;
|
struct kernfs_super_info *info;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
info = kzalloc(sizeof(*info), GFP_KERNEL);
|
info = kzalloc(sizeof(*info), GFP_KERNEL);
|
||||||
if (!info)
|
if (!info)
|
||||||
return ERR_PTR(-ENOMEM);
|
return -ENOMEM;
|
||||||
|
|
||||||
info->root = root;
|
info->root = kfc->root;
|
||||||
info->ns = ns;
|
info->ns = kfc->ns_tag;
|
||||||
INIT_LIST_HEAD(&info->node);
|
INIT_LIST_HEAD(&info->node);
|
||||||
|
|
||||||
sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags,
|
fc->s_fs_info = info;
|
||||||
&init_user_ns, info);
|
sb = sget_fc(fc, kernfs_test_super, kernfs_set_super);
|
||||||
if (IS_ERR(sb) || sb->s_fs_info != info)
|
|
||||||
kfree(info);
|
|
||||||
if (IS_ERR(sb))
|
if (IS_ERR(sb))
|
||||||
return ERR_CAST(sb);
|
return PTR_ERR(sb);
|
||||||
|
|
||||||
if (new_sb_created)
|
|
||||||
*new_sb_created = !sb->s_root;
|
|
||||||
|
|
||||||
if (!sb->s_root) {
|
if (!sb->s_root) {
|
||||||
struct kernfs_super_info *info = kernfs_info(sb);
|
struct kernfs_super_info *info = kernfs_info(sb);
|
||||||
|
|
||||||
error = kernfs_fill_super(sb, magic);
|
kfc->new_sb_created = true;
|
||||||
|
|
||||||
|
error = kernfs_fill_super(sb, kfc);
|
||||||
if (error) {
|
if (error) {
|
||||||
deactivate_locked_super(sb);
|
deactivate_locked_super(sb);
|
||||||
return ERR_PTR(error);
|
return error;
|
||||||
}
|
}
|
||||||
sb->s_flags |= SB_ACTIVE;
|
sb->s_flags |= SB_ACTIVE;
|
||||||
|
|
||||||
mutex_lock(&kernfs_mutex);
|
mutex_lock(&kernfs_mutex);
|
||||||
list_add(&info->node, &root->supers);
|
list_add(&info->node, &info->root->supers);
|
||||||
mutex_unlock(&kernfs_mutex);
|
mutex_unlock(&kernfs_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
return dget(sb->s_root);
|
fc->root = dget(sb->s_root);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void kernfs_free_fs_context(struct fs_context *fc)
|
||||||
|
{
|
||||||
|
/* Note that we don't deal with kfc->ns_tag here. */
|
||||||
|
kfree(fc->s_fs_info);
|
||||||
|
fc->s_fs_info = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -13,34 +13,69 @@
|
|||||||
#include <linux/magic.h>
|
#include <linux/magic.h>
|
||||||
#include <linux/mount.h>
|
#include <linux/mount.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
#include <linux/user_namespace.h>
|
#include <linux/user_namespace.h>
|
||||||
|
#include <linux/fs_context.h>
|
||||||
|
#include <net/net_namespace.h>
|
||||||
|
|
||||||
#include "sysfs.h"
|
#include "sysfs.h"
|
||||||
|
|
||||||
static struct kernfs_root *sysfs_root;
|
static struct kernfs_root *sysfs_root;
|
||||||
struct kernfs_node *sysfs_root_kn;
|
struct kernfs_node *sysfs_root_kn;
|
||||||
|
|
||||||
static struct dentry *sysfs_mount(struct file_system_type *fs_type,
|
static int sysfs_get_tree(struct fs_context *fc)
|
||||||
int flags, const char *dev_name, void *data)
|
|
||||||
{
|
{
|
||||||
struct dentry *root;
|
struct kernfs_fs_context *kfc = fc->fs_private;
|
||||||
void *ns;
|
int ret;
|
||||||
bool new_sb = false;
|
|
||||||
|
|
||||||
if (!(flags & SB_KERNMOUNT)) {
|
ret = kernfs_get_tree(fc);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (kfc->new_sb_created)
|
||||||
|
fc->root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void sysfs_fs_context_free(struct fs_context *fc)
|
||||||
|
{
|
||||||
|
struct kernfs_fs_context *kfc = fc->fs_private;
|
||||||
|
|
||||||
|
if (kfc->ns_tag)
|
||||||
|
kobj_ns_drop(KOBJ_NS_TYPE_NET, kfc->ns_tag);
|
||||||
|
kernfs_free_fs_context(fc);
|
||||||
|
kfree(kfc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct fs_context_operations sysfs_fs_context_ops = {
|
||||||
|
.free = sysfs_fs_context_free,
|
||||||
|
.get_tree = sysfs_get_tree,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int sysfs_init_fs_context(struct fs_context *fc)
|
||||||
|
{
|
||||||
|
struct kernfs_fs_context *kfc;
|
||||||
|
struct net *netns;
|
||||||
|
|
||||||
|
if (!(fc->sb_flags & SB_KERNMOUNT)) {
|
||||||
if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
|
if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
|
||||||
return ERR_PTR(-EPERM);
|
return -EPERM;
|
||||||
}
|
}
|
||||||
|
|
||||||
ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
|
kfc = kzalloc(sizeof(struct kernfs_fs_context), GFP_KERNEL);
|
||||||
root = kernfs_mount_ns(fs_type, flags, sysfs_root,
|
if (!kfc)
|
||||||
SYSFS_MAGIC, &new_sb, ns);
|
return -ENOMEM;
|
||||||
if (!new_sb)
|
|
||||||
kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
|
|
||||||
else if (!IS_ERR(root))
|
|
||||||
root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
|
|
||||||
|
|
||||||
return root;
|
kfc->ns_tag = netns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
|
||||||
|
kfc->root = sysfs_root;
|
||||||
|
kfc->magic = SYSFS_MAGIC;
|
||||||
|
fc->fs_private = kfc;
|
||||||
|
fc->ops = &sysfs_fs_context_ops;
|
||||||
|
if (fc->user_ns)
|
||||||
|
put_user_ns(fc->user_ns);
|
||||||
|
fc->user_ns = get_user_ns(netns->user_ns);
|
||||||
|
fc->global = true;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void sysfs_kill_sb(struct super_block *sb)
|
static void sysfs_kill_sb(struct super_block *sb)
|
||||||
@ -52,10 +87,10 @@ static void sysfs_kill_sb(struct super_block *sb)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct file_system_type sysfs_fs_type = {
|
static struct file_system_type sysfs_fs_type = {
|
||||||
.name = "sysfs",
|
.name = "sysfs",
|
||||||
.mount = sysfs_mount,
|
.init_fs_context = sysfs_init_fs_context,
|
||||||
.kill_sb = sysfs_kill_sb,
|
.kill_sb = sysfs_kill_sb,
|
||||||
.fs_flags = FS_USERNS_MOUNT,
|
.fs_flags = FS_USERNS_MOUNT,
|
||||||
};
|
};
|
||||||
|
|
||||||
int __init sysfs_init(void)
|
int __init sysfs_init(void)
|
||||||
|
@ -25,7 +25,9 @@ struct seq_file;
|
|||||||
struct vm_area_struct;
|
struct vm_area_struct;
|
||||||
struct super_block;
|
struct super_block;
|
||||||
struct file_system_type;
|
struct file_system_type;
|
||||||
|
struct fs_context;
|
||||||
|
|
||||||
|
struct kernfs_fs_context;
|
||||||
struct kernfs_open_node;
|
struct kernfs_open_node;
|
||||||
struct kernfs_iattrs;
|
struct kernfs_iattrs;
|
||||||
|
|
||||||
@ -167,7 +169,6 @@ struct kernfs_node {
|
|||||||
* kernfs_node parameter.
|
* kernfs_node parameter.
|
||||||
*/
|
*/
|
||||||
struct kernfs_syscall_ops {
|
struct kernfs_syscall_ops {
|
||||||
int (*remount_fs)(struct kernfs_root *root, int *flags, char *data);
|
|
||||||
int (*show_options)(struct seq_file *sf, struct kernfs_root *root);
|
int (*show_options)(struct seq_file *sf, struct kernfs_root *root);
|
||||||
|
|
||||||
int (*mkdir)(struct kernfs_node *parent, const char *name,
|
int (*mkdir)(struct kernfs_node *parent, const char *name,
|
||||||
@ -268,6 +269,18 @@ struct kernfs_ops {
|
|||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The kernfs superblock creation/mount parameter context.
|
||||||
|
*/
|
||||||
|
struct kernfs_fs_context {
|
||||||
|
struct kernfs_root *root; /* Root of the hierarchy being mounted */
|
||||||
|
void *ns_tag; /* Namespace tag of the mount (or NULL) */
|
||||||
|
unsigned long magic; /* File system specific magic number */
|
||||||
|
|
||||||
|
/* The following are set/used by kernfs_mount() */
|
||||||
|
bool new_sb_created; /* Set to T if we allocated a new sb */
|
||||||
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_KERNFS
|
#ifdef CONFIG_KERNFS
|
||||||
|
|
||||||
static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
|
static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
|
||||||
@ -353,9 +366,8 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
|
|||||||
void kernfs_notify(struct kernfs_node *kn);
|
void kernfs_notify(struct kernfs_node *kn);
|
||||||
|
|
||||||
const void *kernfs_super_ns(struct super_block *sb);
|
const void *kernfs_super_ns(struct super_block *sb);
|
||||||
struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
|
int kernfs_get_tree(struct fs_context *fc);
|
||||||
struct kernfs_root *root, unsigned long magic,
|
void kernfs_free_fs_context(struct fs_context *fc);
|
||||||
bool *new_sb_created, const void *ns);
|
|
||||||
void kernfs_kill_sb(struct super_block *sb);
|
void kernfs_kill_sb(struct super_block *sb);
|
||||||
|
|
||||||
void kernfs_init(void);
|
void kernfs_init(void);
|
||||||
@ -458,11 +470,10 @@ static inline void kernfs_notify(struct kernfs_node *kn) { }
|
|||||||
static inline const void *kernfs_super_ns(struct super_block *sb)
|
static inline const void *kernfs_super_ns(struct super_block *sb)
|
||||||
{ return NULL; }
|
{ return NULL; }
|
||||||
|
|
||||||
static inline struct dentry *
|
static inline int kernfs_get_tree(struct fs_context *fc)
|
||||||
kernfs_mount_ns(struct file_system_type *fs_type, int flags,
|
{ return -ENOSYS; }
|
||||||
struct kernfs_root *root, unsigned long magic,
|
|
||||||
bool *new_sb_created, const void *ns)
|
static inline void kernfs_free_fs_context(struct fs_context *fc) { }
|
||||||
{ return ERR_PTR(-ENOSYS); }
|
|
||||||
|
|
||||||
static inline void kernfs_kill_sb(struct super_block *sb) { }
|
static inline void kernfs_kill_sb(struct super_block *sb) { }
|
||||||
|
|
||||||
@ -545,13 +556,4 @@ static inline int kernfs_rename(struct kernfs_node *kn,
|
|||||||
return kernfs_rename_ns(kn, new_parent, new_name, NULL);
|
return kernfs_rename_ns(kn, new_parent, new_name, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct dentry *
|
|
||||||
kernfs_mount(struct file_system_type *fs_type, int flags,
|
|
||||||
struct kernfs_root *root, unsigned long magic,
|
|
||||||
bool *new_sb_created)
|
|
||||||
{
|
|
||||||
return kernfs_mount_ns(fs_type, flags, root,
|
|
||||||
magic, new_sb_created, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __LINUX_KERNFS_H */
|
#endif /* __LINUX_KERNFS_H */
|
||||||
|
@ -41,6 +41,7 @@ extern void __init enable_debug_cgroup(void);
|
|||||||
* The cgroup filesystem superblock creation/mount context.
|
* The cgroup filesystem superblock creation/mount context.
|
||||||
*/
|
*/
|
||||||
struct cgroup_fs_context {
|
struct cgroup_fs_context {
|
||||||
|
struct kernfs_fs_context kfc;
|
||||||
struct cgroup_root *root;
|
struct cgroup_root *root;
|
||||||
struct cgroup_namespace *ns;
|
struct cgroup_namespace *ns;
|
||||||
unsigned int flags; /* CGRP_ROOT_* flags */
|
unsigned int flags; /* CGRP_ROOT_* flags */
|
||||||
@ -56,7 +57,9 @@ struct cgroup_fs_context {
|
|||||||
|
|
||||||
static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc)
|
static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc)
|
||||||
{
|
{
|
||||||
return fc->fs_private;
|
struct kernfs_fs_context *kfc = fc->fs_private;
|
||||||
|
|
||||||
|
return container_of(kfc, struct cgroup_fs_context, kfc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2039,18 +2039,14 @@ out:
|
|||||||
int cgroup_do_get_tree(struct fs_context *fc)
|
int cgroup_do_get_tree(struct fs_context *fc)
|
||||||
{
|
{
|
||||||
struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
|
struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
|
||||||
bool new_sb = false;
|
int ret;
|
||||||
unsigned long magic;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
|
ctx->kfc.root = ctx->root->kf_root;
|
||||||
if (fc->fs_type == &cgroup2_fs_type)
|
if (fc->fs_type == &cgroup2_fs_type)
|
||||||
magic = CGROUP2_SUPER_MAGIC;
|
ctx->kfc.magic = CGROUP2_SUPER_MAGIC;
|
||||||
else
|
else
|
||||||
magic = CGROUP_SUPER_MAGIC;
|
ctx->kfc.magic = CGROUP_SUPER_MAGIC;
|
||||||
fc->root = kernfs_mount(fc->fs_type, fc->sb_flags, ctx->root->kf_root,
|
ret = kernfs_get_tree(fc);
|
||||||
magic, &new_sb);
|
|
||||||
if (IS_ERR(fc->root))
|
|
||||||
ret = PTR_ERR(fc->root);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In non-init cgroup namespace, instead of root cgroup's dentry,
|
* In non-init cgroup namespace, instead of root cgroup's dentry,
|
||||||
@ -2078,7 +2074,7 @@ int cgroup_do_get_tree(struct fs_context *fc)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!new_sb)
|
if (!ctx->kfc.new_sb_created)
|
||||||
cgroup_put(&ctx->root->cgrp);
|
cgroup_put(&ctx->root->cgrp);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -2094,19 +2090,15 @@ static void cgroup_fs_context_free(struct fs_context *fc)
|
|||||||
kfree(ctx->name);
|
kfree(ctx->name);
|
||||||
kfree(ctx->release_agent);
|
kfree(ctx->release_agent);
|
||||||
put_cgroup_ns(ctx->ns);
|
put_cgroup_ns(ctx->ns);
|
||||||
|
kernfs_free_fs_context(fc);
|
||||||
kfree(ctx);
|
kfree(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cgroup_get_tree(struct fs_context *fc)
|
static int cgroup_get_tree(struct fs_context *fc)
|
||||||
{
|
{
|
||||||
struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
|
|
||||||
struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
|
struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* Check if the caller has permission to mount. */
|
|
||||||
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
|
|
||||||
return -EPERM;
|
|
||||||
|
|
||||||
cgrp_dfl_visible = true;
|
cgrp_dfl_visible = true;
|
||||||
cgroup_get_live(&cgrp_dfl_root.cgrp);
|
cgroup_get_live(&cgrp_dfl_root.cgrp);
|
||||||
ctx->root = &cgrp_dfl_root;
|
ctx->root = &cgrp_dfl_root;
|
||||||
@ -2132,7 +2124,8 @@ static const struct fs_context_operations cgroup1_fs_context_ops = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialise the cgroup filesystem creation/reconfiguration context.
|
* Initialise the cgroup filesystem creation/reconfiguration context. Notably,
|
||||||
|
* we select the namespace we're going to use.
|
||||||
*/
|
*/
|
||||||
static int cgroup_init_fs_context(struct fs_context *fc)
|
static int cgroup_init_fs_context(struct fs_context *fc)
|
||||||
{
|
{
|
||||||
@ -2151,11 +2144,15 @@ static int cgroup_init_fs_context(struct fs_context *fc)
|
|||||||
|
|
||||||
ctx->ns = current->nsproxy->cgroup_ns;
|
ctx->ns = current->nsproxy->cgroup_ns;
|
||||||
get_cgroup_ns(ctx->ns);
|
get_cgroup_ns(ctx->ns);
|
||||||
fc->fs_private = ctx;
|
fc->fs_private = &ctx->kfc;
|
||||||
if (fc->fs_type == &cgroup2_fs_type)
|
if (fc->fs_type == &cgroup2_fs_type)
|
||||||
fc->ops = &cgroup_fs_context_ops;
|
fc->ops = &cgroup_fs_context_ops;
|
||||||
else
|
else
|
||||||
fc->ops = &cgroup1_fs_context_ops;
|
fc->ops = &cgroup1_fs_context_ops;
|
||||||
|
if (fc->user_ns)
|
||||||
|
put_user_ns(fc->user_ns);
|
||||||
|
fc->user_ns = get_user_ns(ctx->ns->user_ns);
|
||||||
|
fc->global = true;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user