linux/fs/erofs/namei.c

225 lines
5.3 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
* Copyright (C) 2022, Alibaba Cloud
*/
#include "xattr.h"
#include <trace/events/erofs.h>
struct erofs_qstr {
const unsigned char *name;
const unsigned char *end;
};
/* based on the end of qn is accurate and it must have the trailing '\0' */
static inline int erofs_dirnamecmp(const struct erofs_qstr *qn,
const struct erofs_qstr *qd,
unsigned int *matched)
{
unsigned int i = *matched;
/*
* on-disk error, let's only BUG_ON in the debugging mode.
* otherwise, it will return 1 to just skip the invalid name
* and go on (in consideration of the lookup performance).
*/
DBG_BUGON(qd->name > qd->end);
/* qd could not have trailing '\0' */
/* However it is absolutely safe if < qd->end */
while (qd->name + i < qd->end && qd->name[i] != '\0') {
if (qn->name[i] != qd->name[i]) {
*matched = i;
return qn->name[i] > qd->name[i] ? 1 : -1;
}
++i;
}
*matched = i;
/* See comments in __d_alloc on the terminating NUL character */
return qn->name[i] == '\0' ? 0 : 1;
}
#define nameoff_from_disk(off, sz) (le16_to_cpu(off) & ((sz) - 1))
static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
u8 *data,
unsigned int dirblksize,
const int ndirents)
{
int head, back;
unsigned int startprfx, endprfx;
struct erofs_dirent *const de = (struct erofs_dirent *)data;
/* since the 1st dirent has been evaluated previously */
head = 1;
back = ndirents - 1;
startprfx = endprfx = 0;
while (head <= back) {
const int mid = head + (back - head) / 2;
const int nameoff = nameoff_from_disk(de[mid].nameoff,
dirblksize);
unsigned int matched = min(startprfx, endprfx);
struct erofs_qstr dname = {
.name = data + nameoff,
.end = mid >= ndirents - 1 ?
data + dirblksize :
data + nameoff_from_disk(de[mid + 1].nameoff,
dirblksize)
};
/* string comparison without already matched prefix */
int ret = erofs_dirnamecmp(name, &dname, &matched);
if (!ret) {
return de + mid;
} else if (ret > 0) {
head = mid + 1;
startprfx = matched;
} else {
back = mid - 1;
endprfx = matched;
}
}
return ERR_PTR(-ENOENT);
}
static void *erofs_find_target_block(struct erofs_buf *target,
struct inode *dir, struct erofs_qstr *name, int *_ndirents)
{
erofs: avoid hardcoded blocksize for subpage block support As the first step of converting hardcoded blocksize to that specified in on-disk superblock, convert all call sites of hardcoded blocksize to sb->s_blocksize except for: 1) use sbi->blkszbits instead of sb->s_blocksize in erofs_superblock_csum_verify() since sb->s_blocksize has not been updated with the on-disk blocksize yet when the function is called. 2) use inode->i_blkbits instead of sb->s_blocksize in erofs_bread(), since the inode operated on may be an anonymous inode in fscache mode. Currently the anonymous inode is allocated from an anonymous mount maintained in erofs, while in the near future we may allocate anonymous inodes from a generic API directly and thus have no access to the anonymous inode's i_sb. Thus we keep the block size in i_blkbits for anonymous inodes in fscache mode. Be noted that this patch only gets rid of the hardcoded blocksize, in preparation for actually setting the on-disk block size in the following patch. The hard limit of constraining the block size to PAGE_SIZE still exists until the next patch. Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> Reviewed-by: Yue Hu <huyue2@coolpad.com> Reviewed-by: Chao Yu <chao@kernel.org> Link: https://lore.kernel.org/r/20230313135309.75269-2-jefflexu@linux.alibaba.com [ Gao Xiang: fold a patch to fix incorrect truncated offsets. ] Link: https://lore.kernel.org/r/20230413035734.15457-1-zhujia.zj@bytedance.com Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2023-03-13 13:53:08 +00:00
unsigned int bsz = i_blocksize(dir);
int head = 0, back = erofs_iblks(dir) - 1;
unsigned int startprfx = 0, endprfx = 0;
void *candidate = ERR_PTR(-ENOENT);
while (head <= back) {
const int mid = head + (back - head) / 2;
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_dirent *de;
buf.mapping = dir->i_mapping;
de = erofs_bread(&buf, erofs_pos(dir->i_sb, mid), EROFS_KMAP);
if (!IS_ERR(de)) {
erofs: avoid hardcoded blocksize for subpage block support As the first step of converting hardcoded blocksize to that specified in on-disk superblock, convert all call sites of hardcoded blocksize to sb->s_blocksize except for: 1) use sbi->blkszbits instead of sb->s_blocksize in erofs_superblock_csum_verify() since sb->s_blocksize has not been updated with the on-disk blocksize yet when the function is called. 2) use inode->i_blkbits instead of sb->s_blocksize in erofs_bread(), since the inode operated on may be an anonymous inode in fscache mode. Currently the anonymous inode is allocated from an anonymous mount maintained in erofs, while in the near future we may allocate anonymous inodes from a generic API directly and thus have no access to the anonymous inode's i_sb. Thus we keep the block size in i_blkbits for anonymous inodes in fscache mode. Be noted that this patch only gets rid of the hardcoded blocksize, in preparation for actually setting the on-disk block size in the following patch. The hard limit of constraining the block size to PAGE_SIZE still exists until the next patch. Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> Reviewed-by: Yue Hu <huyue2@coolpad.com> Reviewed-by: Chao Yu <chao@kernel.org> Link: https://lore.kernel.org/r/20230313135309.75269-2-jefflexu@linux.alibaba.com [ Gao Xiang: fold a patch to fix incorrect truncated offsets. ] Link: https://lore.kernel.org/r/20230413035734.15457-1-zhujia.zj@bytedance.com Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2023-03-13 13:53:08 +00:00
const int nameoff = nameoff_from_disk(de->nameoff, bsz);
const int ndirents = nameoff / sizeof(*de);
int diff;
unsigned int matched;
struct erofs_qstr dname;
if (!ndirents) {
erofs_put_metabuf(&buf);
erofs_err(dir->i_sb,
"corrupted dir block %d @ nid %llu",
mid, EROFS_I(dir)->nid);
DBG_BUGON(1);
de = ERR_PTR(-EFSCORRUPTED);
goto out;
}
matched = min(startprfx, endprfx);
dname.name = (u8 *)de + nameoff;
if (ndirents == 1)
erofs: avoid hardcoded blocksize for subpage block support As the first step of converting hardcoded blocksize to that specified in on-disk superblock, convert all call sites of hardcoded blocksize to sb->s_blocksize except for: 1) use sbi->blkszbits instead of sb->s_blocksize in erofs_superblock_csum_verify() since sb->s_blocksize has not been updated with the on-disk blocksize yet when the function is called. 2) use inode->i_blkbits instead of sb->s_blocksize in erofs_bread(), since the inode operated on may be an anonymous inode in fscache mode. Currently the anonymous inode is allocated from an anonymous mount maintained in erofs, while in the near future we may allocate anonymous inodes from a generic API directly and thus have no access to the anonymous inode's i_sb. Thus we keep the block size in i_blkbits for anonymous inodes in fscache mode. Be noted that this patch only gets rid of the hardcoded blocksize, in preparation for actually setting the on-disk block size in the following patch. The hard limit of constraining the block size to PAGE_SIZE still exists until the next patch. Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> Reviewed-by: Yue Hu <huyue2@coolpad.com> Reviewed-by: Chao Yu <chao@kernel.org> Link: https://lore.kernel.org/r/20230313135309.75269-2-jefflexu@linux.alibaba.com [ Gao Xiang: fold a patch to fix incorrect truncated offsets. ] Link: https://lore.kernel.org/r/20230413035734.15457-1-zhujia.zj@bytedance.com Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2023-03-13 13:53:08 +00:00
dname.end = (u8 *)de + bsz;
else
dname.end = (u8 *)de +
erofs: avoid hardcoded blocksize for subpage block support As the first step of converting hardcoded blocksize to that specified in on-disk superblock, convert all call sites of hardcoded blocksize to sb->s_blocksize except for: 1) use sbi->blkszbits instead of sb->s_blocksize in erofs_superblock_csum_verify() since sb->s_blocksize has not been updated with the on-disk blocksize yet when the function is called. 2) use inode->i_blkbits instead of sb->s_blocksize in erofs_bread(), since the inode operated on may be an anonymous inode in fscache mode. Currently the anonymous inode is allocated from an anonymous mount maintained in erofs, while in the near future we may allocate anonymous inodes from a generic API directly and thus have no access to the anonymous inode's i_sb. Thus we keep the block size in i_blkbits for anonymous inodes in fscache mode. Be noted that this patch only gets rid of the hardcoded blocksize, in preparation for actually setting the on-disk block size in the following patch. The hard limit of constraining the block size to PAGE_SIZE still exists until the next patch. Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> Reviewed-by: Yue Hu <huyue2@coolpad.com> Reviewed-by: Chao Yu <chao@kernel.org> Link: https://lore.kernel.org/r/20230313135309.75269-2-jefflexu@linux.alibaba.com [ Gao Xiang: fold a patch to fix incorrect truncated offsets. ] Link: https://lore.kernel.org/r/20230413035734.15457-1-zhujia.zj@bytedance.com Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2023-03-13 13:53:08 +00:00
nameoff_from_disk(de[1].nameoff, bsz);
/* string comparison without already matched prefix */
diff = erofs_dirnamecmp(name, &dname, &matched);
if (diff < 0) {
erofs_put_metabuf(&buf);
back = mid - 1;
endprfx = matched;
continue;
}
if (!IS_ERR(candidate))
erofs_put_metabuf(target);
*target = buf;
if (!diff) {
*_ndirents = 0;
return de;
}
head = mid + 1;
startprfx = matched;
candidate = de;
*_ndirents = ndirents;
continue;
}
out: /* free if the candidate is valid */
if (!IS_ERR(candidate))
erofs_put_metabuf(target);
return de;
}
return candidate;
}
int erofs_namei(struct inode *dir, const struct qstr *name, erofs_nid_t *nid,
unsigned int *d_type)
{
int ndirents;
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_dirent *de;
struct erofs_qstr qn;
if (!dir->i_size)
return -ENOENT;
qn.name = name->name;
qn.end = name->name + name->len;
buf.mapping = dir->i_mapping;
ndirents = 0;
de = erofs_find_target_block(&buf, dir, &qn, &ndirents);
if (IS_ERR(de))
return PTR_ERR(de);
if (ndirents)
erofs: avoid hardcoded blocksize for subpage block support As the first step of converting hardcoded blocksize to that specified in on-disk superblock, convert all call sites of hardcoded blocksize to sb->s_blocksize except for: 1) use sbi->blkszbits instead of sb->s_blocksize in erofs_superblock_csum_verify() since sb->s_blocksize has not been updated with the on-disk blocksize yet when the function is called. 2) use inode->i_blkbits instead of sb->s_blocksize in erofs_bread(), since the inode operated on may be an anonymous inode in fscache mode. Currently the anonymous inode is allocated from an anonymous mount maintained in erofs, while in the near future we may allocate anonymous inodes from a generic API directly and thus have no access to the anonymous inode's i_sb. Thus we keep the block size in i_blkbits for anonymous inodes in fscache mode. Be noted that this patch only gets rid of the hardcoded blocksize, in preparation for actually setting the on-disk block size in the following patch. The hard limit of constraining the block size to PAGE_SIZE still exists until the next patch. Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com> Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> Reviewed-by: Yue Hu <huyue2@coolpad.com> Reviewed-by: Chao Yu <chao@kernel.org> Link: https://lore.kernel.org/r/20230313135309.75269-2-jefflexu@linux.alibaba.com [ Gao Xiang: fold a patch to fix incorrect truncated offsets. ] Link: https://lore.kernel.org/r/20230413035734.15457-1-zhujia.zj@bytedance.com Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2023-03-13 13:53:08 +00:00
de = find_target_dirent(&qn, (u8 *)de, i_blocksize(dir),
ndirents);
if (!IS_ERR(de)) {
*nid = le64_to_cpu(de->nid);
*d_type = de->file_type;
}
erofs_put_metabuf(&buf);
return PTR_ERR_OR_ZERO(de);
}
static struct dentry *erofs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
int err;
erofs_nid_t nid;
unsigned int d_type;
struct inode *inode;
trace_erofs_lookup(dir, dentry, flags);
if (dentry->d_name.len > EROFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
err = erofs_namei(dir, &dentry->d_name, &nid, &d_type);
if (err == -ENOENT)
/* negative dentry */
inode = NULL;
else if (err)
inode = ERR_PTR(err);
else
inode = erofs_iget(dir->i_sb, nid);
return d_splice_alias(inode, dentry);
}
const struct inode_operations erofs_dir_iops = {
.lookup = erofs_lookup,
.getattr = erofs_getattr,
.listxattr = erofs_listxattr,
fs: rename current get acl method The current way of setting and getting posix acls through the generic xattr interface is error prone and type unsafe. The vfs needs to interpret and fixup posix acls before storing or reporting it to userspace. Various hacks exist to make this work. The code is hard to understand and difficult to maintain in it's current form. Instead of making this work by hacking posix acls through xattr handlers we are building a dedicated posix acl api around the get and set inode operations. This removes a lot of hackiness and makes the codepaths easier to maintain. A lot of background can be found in [1]. The current inode operation for getting posix acls takes an inode argument but various filesystems (e.g., 9p, cifs, overlayfs) need access to the dentry. In contrast to the ->set_acl() inode operation we cannot simply extend ->get_acl() to take a dentry argument. The ->get_acl() inode operation is called from: acl_permission_check() -> check_acl() -> get_acl() which is part of generic_permission() which in turn is part of inode_permission(). Both generic_permission() and inode_permission() are called in the ->permission() handler of various filesystems (e.g., overlayfs). So simply passing a dentry argument to ->get_acl() would amount to also having to pass a dentry argument to ->permission(). We should avoid this unnecessary change. So instead of extending the existing inode operation rename it from ->get_acl() to ->get_inode_acl() and add a ->get_acl() method later that passes a dentry argument and which filesystems that need access to the dentry can implement instead of ->get_inode_acl(). Filesystems like cifs which allow setting and getting posix acls but not using them for permission checking during lookup can simply not implement ->get_inode_acl(). This is intended to be a non-functional change. Link: https://lore.kernel.org/all/20220801145520.1532837-1-brauner@kernel.org [1] Suggested-by/Inspired-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
2022-09-22 15:17:00 +00:00
.get_inode_acl = erofs_get_acl,
.fiemap = erofs_fiemap,
};