2019-07-31 15:57:31 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2018-07-26 12:21:47 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2017-2018 HUAWEI, Inc.
|
2020-07-13 13:09:44 +00:00
|
|
|
* https://www.huawei.com/
|
2021-08-20 10:00:19 +00:00
|
|
|
* Copyright (C) 2021, Alibaba Cloud
|
2018-07-26 12:21:47 +00:00
|
|
|
*/
|
|
|
|
#include "internal.h"
|
2022-04-25 12:21:39 +00:00
|
|
|
#include <linux/sched/mm.h>
|
2018-07-26 12:21:55 +00:00
|
|
|
#include <trace/events/erofs.h>
|
|
|
|
|
2022-01-02 04:00:13 +00:00
|
|
|
void erofs_unmap_metabuf(struct erofs_buf *buf)
|
|
|
|
{
|
|
|
|
if (buf->kmap_type == EROFS_KMAP)
|
2022-10-18 10:53:13 +00:00
|
|
|
kunmap_local(buf->base);
|
2022-01-02 04:00:13 +00:00
|
|
|
buf->base = NULL;
|
|
|
|
buf->kmap_type = EROFS_NO_KMAP;
|
|
|
|
}
|
|
|
|
|
|
|
|
void erofs_put_metabuf(struct erofs_buf *buf)
|
|
|
|
{
|
|
|
|
if (!buf->page)
|
|
|
|
return;
|
|
|
|
erofs_unmap_metabuf(buf);
|
2024-07-23 07:30:24 +00:00
|
|
|
folio_put(page_folio(buf->page));
|
2022-01-02 04:00:13 +00:00
|
|
|
buf->page = NULL;
|
|
|
|
}
|
|
|
|
|
2024-04-07 07:04:50 +00:00
|
|
|
void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset,
|
2023-04-07 14:17:04 +00:00
|
|
|
enum erofs_kmap_type type)
|
2022-01-02 04:00:13 +00:00
|
|
|
{
|
|
|
|
pgoff_t index = offset >> PAGE_SHIFT;
|
2024-07-23 07:30:24 +00:00
|
|
|
struct folio *folio = NULL;
|
2022-01-02 04:00:13 +00:00
|
|
|
|
2024-07-23 07:30:24 +00:00
|
|
|
if (buf->page) {
|
|
|
|
folio = page_folio(buf->page);
|
|
|
|
if (folio_file_page(folio, index) != buf->page)
|
|
|
|
erofs_unmap_metabuf(buf);
|
|
|
|
}
|
|
|
|
if (!folio || !folio_contains(folio, index)) {
|
2022-01-02 04:00:13 +00:00
|
|
|
erofs_put_metabuf(buf);
|
2024-07-23 07:30:24 +00:00
|
|
|
folio = read_mapping_folio(buf->mapping, index, NULL);
|
2022-04-25 12:21:39 +00:00
|
|
|
if (IS_ERR(folio))
|
|
|
|
return folio;
|
2022-01-02 04:00:13 +00:00
|
|
|
}
|
2024-07-23 07:30:24 +00:00
|
|
|
buf->page = folio_file_page(folio, index);
|
|
|
|
|
2022-01-02 04:00:13 +00:00
|
|
|
if (buf->kmap_type == EROFS_NO_KMAP) {
|
|
|
|
if (type == EROFS_KMAP)
|
2024-07-23 07:30:24 +00:00
|
|
|
buf->base = kmap_local_page(buf->page);
|
2022-01-02 04:00:13 +00:00
|
|
|
buf->kmap_type = type;
|
|
|
|
} else if (buf->kmap_type != type) {
|
|
|
|
DBG_BUGON(1);
|
|
|
|
return ERR_PTR(-EFAULT);
|
|
|
|
}
|
|
|
|
if (type == EROFS_NO_KMAP)
|
|
|
|
return NULL;
|
|
|
|
return buf->base + (offset & ~PAGE_MASK);
|
|
|
|
}
|
|
|
|
|
2023-04-07 14:17:04 +00:00
|
|
|
void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
|
2022-03-16 01:22:45 +00:00
|
|
|
{
|
erofs: add file-backed mount support
It actually has been around for years: For containers and other sandbox
use cases, there will be thousands (and even more) of authenticated
(sub)images running on the same host, unlike OS images.
Of course, all scenarios can use the same EROFS on-disk format, but
bdev-backed mounts just work well for OS images since golden data is
dumped into real block devices. However, it's somewhat hard for
container runtimes to manage and isolate so many unnecessary virtual
block devices safely and efficiently [1]: they just look like a burden
to orchestrators and file-backed mounts are preferred indeed. There
were already enough attempts such as Incremental FS, the original
ComposeFS and PuzzleFS acting in the same way for immutable fses. As
for current EROFS users, ComposeFS, containerd and Android APEXs will
be directly benefited from it.
On the other hand, previous experimental feature "erofs over fscache"
was once also intended to provide a similar solution (inspired by
Incremental FS discussion [2]), but the following facts show file-backed
mounts will be a better approach:
- Fscache infrastructure has recently been moved into new Netfslib
which is an unexpected dependency to EROFS really, although it
originally claims "it could be used for caching other things such as
ISO9660 filesystems too." [3]
- It takes an unexpectedly long time to upstream Fscache/Cachefiles
enhancements. For example, the failover feature took more than
one year, and the deamonless feature is still far behind now;
- Ongoing HSM "fanotify pre-content hooks" [4] together with this will
perfectly supersede "erofs over fscache" in a simpler way since
developers (mainly containerd folks) could leverage their existing
caching mechanism entirely in userspace instead of strictly following
the predefined in-kernel caching tree hierarchy.
After "fanotify pre-content hooks" lands upstream to provide the same
functionality, "erofs over fscache" will be removed then (as an EROFS
internal improvement and EROFS will not have to bother with on-demand
fetching and/or caching improvements anymore.)
[1] https://github.com/containers/storage/pull/2039
[2] https://lore.kernel.org/r/CAOQ4uxjbVxnubaPjVaGYiSwoGDTdpWbB=w_AeM6YM=zVixsUfQ@mail.gmail.com
[3] https://docs.kernel.org/filesystems/caching/fscache.html
[4] https://lore.kernel.org/r/cover.1723670362.git.josef@toxicpanda.com
Closes: https://github.com/containers/composefs/issues/144
Reviewed-by: Sandeep Dhavale <dhavale@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240830032840.3783206-1-hsiangkao@linux.alibaba.com
2024-08-30 03:28:37 +00:00
|
|
|
struct erofs_sb_info *sbi = EROFS_SB(sb);
|
|
|
|
|
|
|
|
if (erofs_is_fileio_mode(sbi))
|
|
|
|
buf->mapping = file_inode(sbi->fdev)->i_mapping;
|
|
|
|
else if (erofs_is_fscache_mode(sb))
|
|
|
|
buf->mapping = sbi->s_fscache->inode->i_mapping;
|
2023-04-07 14:17:04 +00:00
|
|
|
else
|
2024-04-11 14:53:37 +00:00
|
|
|
buf->mapping = sb->s_bdev->bd_mapping;
|
2023-04-07 14:17:04 +00:00
|
|
|
}
|
2022-04-25 12:21:39 +00:00
|
|
|
|
2023-04-07 14:17:04 +00:00
|
|
|
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
|
2024-04-25 19:58:46 +00:00
|
|
|
erofs_off_t offset, enum erofs_kmap_type type)
|
2023-04-07 14:17:04 +00:00
|
|
|
{
|
|
|
|
erofs_init_metabuf(buf, sb);
|
2024-04-25 19:58:46 +00:00
|
|
|
return erofs_bread(buf, offset, type);
|
2022-03-16 01:22:45 +00:00
|
|
|
}
|
|
|
|
|
2018-07-26 12:21:47 +00:00
|
|
|
static int erofs_map_blocks_flatmode(struct inode *inode,
|
2023-02-09 02:48:25 +00:00
|
|
|
struct erofs_map_blocks *map)
|
2018-07-26 12:21:47 +00:00
|
|
|
{
|
2019-09-04 02:08:56 +00:00
|
|
|
struct erofs_inode *vi = EROFS_I(inode);
|
2023-03-13 13:53:08 +00:00
|
|
|
struct super_block *sb = inode->i_sb;
|
2019-09-04 02:08:54 +00:00
|
|
|
bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
|
2024-09-05 03:03:39 +00:00
|
|
|
erofs_blk_t lastblk = erofs_iblks(inode) - tailendpacking;
|
2018-07-26 12:21:47 +00:00
|
|
|
|
2024-09-05 03:03:39 +00:00
|
|
|
map->m_flags = EROFS_MAP_MAPPED; /* no hole in flat inodes */
|
|
|
|
if (map->m_la < erofs_pos(sb, lastblk)) {
|
2023-03-13 13:53:08 +00:00
|
|
|
map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la;
|
2024-09-05 03:03:39 +00:00
|
|
|
map->m_plen = erofs_pos(sb, lastblk) - map->m_la;
|
|
|
|
} else {
|
|
|
|
DBG_BUGON(!tailendpacking);
|
2023-01-14 15:08:23 +00:00
|
|
|
map->m_pa = erofs_iloc(inode) + vi->inode_isize +
|
2024-09-05 03:03:39 +00:00
|
|
|
vi->xattr_isize + erofs_blkoff(sb, map->m_la);
|
|
|
|
map->m_plen = inode->i_size - map->m_la;
|
2018-07-26 12:21:47 +00:00
|
|
|
|
2021-12-09 01:29:18 +00:00
|
|
|
/* inline data should be located in the same meta block */
|
2023-03-13 13:53:08 +00:00
|
|
|
if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) {
|
2024-09-05 03:03:39 +00:00
|
|
|
erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid);
|
2018-09-18 14:27:28 +00:00
|
|
|
DBG_BUGON(1);
|
2021-12-09 01:29:18 +00:00
|
|
|
return -EFSCORRUPTED;
|
2018-09-18 14:27:28 +00:00
|
|
|
}
|
2018-07-26 12:21:47 +00:00
|
|
|
map->m_flags |= EROFS_MAP_META;
|
|
|
|
}
|
2021-12-09 01:29:18 +00:00
|
|
|
return 0;
|
2018-07-26 12:21:47 +00:00
|
|
|
}
|
|
|
|
|
2023-02-09 02:48:25 +00:00
|
|
|
int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
|
2021-08-20 10:00:19 +00:00
|
|
|
{
|
|
|
|
struct super_block *sb = inode->i_sb;
|
|
|
|
struct erofs_inode *vi = EROFS_I(inode);
|
|
|
|
struct erofs_inode_chunk_index *idx;
|
2022-01-02 04:00:13 +00:00
|
|
|
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
|
2021-08-20 10:00:19 +00:00
|
|
|
u64 chunknr;
|
|
|
|
unsigned int unit;
|
|
|
|
erofs_off_t pos;
|
2022-01-02 04:00:13 +00:00
|
|
|
void *kaddr;
|
2021-08-20 10:00:19 +00:00
|
|
|
int err = 0;
|
|
|
|
|
2023-02-09 02:48:25 +00:00
|
|
|
trace_erofs_map_blocks_enter(inode, map, 0);
|
2021-10-14 08:10:10 +00:00
|
|
|
map->m_deviceid = 0;
|
2021-08-20 10:00:19 +00:00
|
|
|
if (map->m_la >= inode->i_size) {
|
|
|
|
/* leave out-of-bound access unmapped */
|
|
|
|
map->m_flags = 0;
|
2024-09-05 09:30:31 +00:00
|
|
|
map->m_plen = map->m_llen;
|
2021-08-20 10:00:19 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2021-12-09 01:29:18 +00:00
|
|
|
if (vi->datalayout != EROFS_INODE_CHUNK_BASED) {
|
2023-02-09 02:48:25 +00:00
|
|
|
err = erofs_map_blocks_flatmode(inode, map);
|
2021-12-09 01:29:18 +00:00
|
|
|
goto out;
|
|
|
|
}
|
2021-08-20 10:00:19 +00:00
|
|
|
|
|
|
|
if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
|
|
|
|
unit = sizeof(*idx); /* chunk index */
|
|
|
|
else
|
|
|
|
unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */
|
|
|
|
|
|
|
|
chunknr = map->m_la >> vi->chunkbits;
|
2023-01-14 15:08:23 +00:00
|
|
|
pos = ALIGN(erofs_iloc(inode) + vi->inode_isize +
|
2021-08-20 10:00:19 +00:00
|
|
|
vi->xattr_isize, unit) + unit * chunknr;
|
|
|
|
|
erofs: don't align offset for erofs_read_metabuf() (simple cases)
Most of the callers of erofs_read_metabuf() have the following form:
block = erofs_blknr(sb, offset);
off = erofs_blkoff(sb, offset);
p = erofs_read_metabuf(...., erofs_pos(sb, block), ...);
if (IS_ERR(p))
return PTR_ERR(p);
q = p + off;
// no further uses of p, block or off.
The value passed to erofs_read_metabuf() is offset rounded down to block
size, i.e. offset - off. Passing offset as-is would increase the return
value by off in case of success and keep the return value unchanged in
in case of error. In other words, the same could be achieved by
q = erofs_read_metabuf(...., offset, ...);
if (IS_ERR(q))
return PTR_ERR(q);
This commit convert these simple cases.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lore.kernel.org/r/20240425195915.GD1031757@ZenIV
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2024-04-25 19:59:15 +00:00
|
|
|
kaddr = erofs_read_metabuf(&buf, sb, pos, EROFS_KMAP);
|
2022-01-02 04:00:13 +00:00
|
|
|
if (IS_ERR(kaddr)) {
|
|
|
|
err = PTR_ERR(kaddr);
|
2021-12-09 01:29:18 +00:00
|
|
|
goto out;
|
|
|
|
}
|
2021-08-20 10:00:19 +00:00
|
|
|
map->m_la = chunknr << vi->chunkbits;
|
|
|
|
map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits,
|
2023-03-13 13:53:08 +00:00
|
|
|
round_up(inode->i_size - map->m_la, sb->s_blocksize));
|
2021-08-20 10:00:19 +00:00
|
|
|
|
|
|
|
/* handle block map */
|
|
|
|
if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
|
erofs: don't align offset for erofs_read_metabuf() (simple cases)
Most of the callers of erofs_read_metabuf() have the following form:
block = erofs_blknr(sb, offset);
off = erofs_blkoff(sb, offset);
p = erofs_read_metabuf(...., erofs_pos(sb, block), ...);
if (IS_ERR(p))
return PTR_ERR(p);
q = p + off;
// no further uses of p, block or off.
The value passed to erofs_read_metabuf() is offset rounded down to block
size, i.e. offset - off. Passing offset as-is would increase the return
value by off in case of success and keep the return value unchanged in
in case of error. In other words, the same could be achieved by
q = erofs_read_metabuf(...., offset, ...);
if (IS_ERR(q))
return PTR_ERR(q);
This commit convert these simple cases.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lore.kernel.org/r/20240425195915.GD1031757@ZenIV
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2024-04-25 19:59:15 +00:00
|
|
|
__le32 *blkaddr = kaddr;
|
2021-08-20 10:00:19 +00:00
|
|
|
|
|
|
|
if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
|
|
|
|
map->m_flags = 0;
|
|
|
|
} else {
|
2023-03-13 13:53:08 +00:00
|
|
|
map->m_pa = erofs_pos(sb, le32_to_cpu(*blkaddr));
|
2021-08-20 10:00:19 +00:00
|
|
|
map->m_flags = EROFS_MAP_MAPPED;
|
|
|
|
}
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
/* parse chunk indexes */
|
erofs: don't align offset for erofs_read_metabuf() (simple cases)
Most of the callers of erofs_read_metabuf() have the following form:
block = erofs_blknr(sb, offset);
off = erofs_blkoff(sb, offset);
p = erofs_read_metabuf(...., erofs_pos(sb, block), ...);
if (IS_ERR(p))
return PTR_ERR(p);
q = p + off;
// no further uses of p, block or off.
The value passed to erofs_read_metabuf() is offset rounded down to block
size, i.e. offset - off. Passing offset as-is would increase the return
value by off in case of success and keep the return value unchanged in
in case of error. In other words, the same could be achieved by
q = erofs_read_metabuf(...., offset, ...);
if (IS_ERR(q))
return PTR_ERR(q);
This commit convert these simple cases.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lore.kernel.org/r/20240425195915.GD1031757@ZenIV
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2024-04-25 19:59:15 +00:00
|
|
|
idx = kaddr;
|
2021-08-20 10:00:19 +00:00
|
|
|
switch (le32_to_cpu(idx->blkaddr)) {
|
|
|
|
case EROFS_NULL_ADDR:
|
|
|
|
map->m_flags = 0;
|
|
|
|
break;
|
|
|
|
default:
|
2021-10-14 08:10:10 +00:00
|
|
|
map->m_deviceid = le16_to_cpu(idx->device_id) &
|
|
|
|
EROFS_SB(sb)->device_id_mask;
|
2023-03-13 13:53:08 +00:00
|
|
|
map->m_pa = erofs_pos(sb, le32_to_cpu(idx->blkaddr));
|
2021-08-20 10:00:19 +00:00
|
|
|
map->m_flags = EROFS_MAP_MAPPED;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
out_unlock:
|
2022-01-02 04:00:13 +00:00
|
|
|
erofs_put_metabuf(&buf);
|
2021-08-20 10:00:19 +00:00
|
|
|
out:
|
2021-12-09 01:29:18 +00:00
|
|
|
if (!err)
|
|
|
|
map->m_llen = map->m_plen;
|
2023-02-09 02:48:25 +00:00
|
|
|
trace_erofs_map_blocks_exit(inode, map, 0, err);
|
2021-08-20 10:00:19 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
erofs: add file-backed mount support
It actually has been around for years: For containers and other sandbox
use cases, there will be thousands (and even more) of authenticated
(sub)images running on the same host, unlike OS images.
Of course, all scenarios can use the same EROFS on-disk format, but
bdev-backed mounts just work well for OS images since golden data is
dumped into real block devices. However, it's somewhat hard for
container runtimes to manage and isolate so many unnecessary virtual
block devices safely and efficiently [1]: they just look like a burden
to orchestrators and file-backed mounts are preferred indeed. There
were already enough attempts such as Incremental FS, the original
ComposeFS and PuzzleFS acting in the same way for immutable fses. As
for current EROFS users, ComposeFS, containerd and Android APEXs will
be directly benefited from it.
On the other hand, previous experimental feature "erofs over fscache"
was once also intended to provide a similar solution (inspired by
Incremental FS discussion [2]), but the following facts show file-backed
mounts will be a better approach:
- Fscache infrastructure has recently been moved into new Netfslib
which is an unexpected dependency to EROFS really, although it
originally claims "it could be used for caching other things such as
ISO9660 filesystems too." [3]
- It takes an unexpectedly long time to upstream Fscache/Cachefiles
enhancements. For example, the failover feature took more than
one year, and the deamonless feature is still far behind now;
- Ongoing HSM "fanotify pre-content hooks" [4] together with this will
perfectly supersede "erofs over fscache" in a simpler way since
developers (mainly containerd folks) could leverage their existing
caching mechanism entirely in userspace instead of strictly following
the predefined in-kernel caching tree hierarchy.
After "fanotify pre-content hooks" lands upstream to provide the same
functionality, "erofs over fscache" will be removed then (as an EROFS
internal improvement and EROFS will not have to bother with on-demand
fetching and/or caching improvements anymore.)
[1] https://github.com/containers/storage/pull/2039
[2] https://lore.kernel.org/r/CAOQ4uxjbVxnubaPjVaGYiSwoGDTdpWbB=w_AeM6YM=zVixsUfQ@mail.gmail.com
[3] https://docs.kernel.org/filesystems/caching/fscache.html
[4] https://lore.kernel.org/r/cover.1723670362.git.josef@toxicpanda.com
Closes: https://github.com/containers/composefs/issues/144
Reviewed-by: Sandeep Dhavale <dhavale@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240830032840.3783206-1-hsiangkao@linux.alibaba.com
2024-08-30 03:28:37 +00:00
|
|
|
static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
|
|
|
|
struct erofs_device_info *dif)
|
|
|
|
{
|
|
|
|
map->m_bdev = NULL;
|
2024-09-05 09:30:31 +00:00
|
|
|
map->m_fp = NULL;
|
|
|
|
if (dif->file) {
|
|
|
|
if (S_ISBLK(file_inode(dif->file)->i_mode))
|
|
|
|
map->m_bdev = file_bdev(dif->file);
|
|
|
|
else
|
|
|
|
map->m_fp = dif->file;
|
|
|
|
}
|
erofs: add file-backed mount support
It actually has been around for years: For containers and other sandbox
use cases, there will be thousands (and even more) of authenticated
(sub)images running on the same host, unlike OS images.
Of course, all scenarios can use the same EROFS on-disk format, but
bdev-backed mounts just work well for OS images since golden data is
dumped into real block devices. However, it's somewhat hard for
container runtimes to manage and isolate so many unnecessary virtual
block devices safely and efficiently [1]: they just look like a burden
to orchestrators and file-backed mounts are preferred indeed. There
were already enough attempts such as Incremental FS, the original
ComposeFS and PuzzleFS acting in the same way for immutable fses. As
for current EROFS users, ComposeFS, containerd and Android APEXs will
be directly benefited from it.
On the other hand, previous experimental feature "erofs over fscache"
was once also intended to provide a similar solution (inspired by
Incremental FS discussion [2]), but the following facts show file-backed
mounts will be a better approach:
- Fscache infrastructure has recently been moved into new Netfslib
which is an unexpected dependency to EROFS really, although it
originally claims "it could be used for caching other things such as
ISO9660 filesystems too." [3]
- It takes an unexpectedly long time to upstream Fscache/Cachefiles
enhancements. For example, the failover feature took more than
one year, and the deamonless feature is still far behind now;
- Ongoing HSM "fanotify pre-content hooks" [4] together with this will
perfectly supersede "erofs over fscache" in a simpler way since
developers (mainly containerd folks) could leverage their existing
caching mechanism entirely in userspace instead of strictly following
the predefined in-kernel caching tree hierarchy.
After "fanotify pre-content hooks" lands upstream to provide the same
functionality, "erofs over fscache" will be removed then (as an EROFS
internal improvement and EROFS will not have to bother with on-demand
fetching and/or caching improvements anymore.)
[1] https://github.com/containers/storage/pull/2039
[2] https://lore.kernel.org/r/CAOQ4uxjbVxnubaPjVaGYiSwoGDTdpWbB=w_AeM6YM=zVixsUfQ@mail.gmail.com
[3] https://docs.kernel.org/filesystems/caching/fscache.html
[4] https://lore.kernel.org/r/cover.1723670362.git.josef@toxicpanda.com
Closes: https://github.com/containers/composefs/issues/144
Reviewed-by: Sandeep Dhavale <dhavale@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240830032840.3783206-1-hsiangkao@linux.alibaba.com
2024-08-30 03:28:37 +00:00
|
|
|
map->m_daxdev = dif->dax_dev;
|
|
|
|
map->m_dax_part_off = dif->dax_part_off;
|
|
|
|
map->m_fscache = dif->fscache;
|
|
|
|
}
|
|
|
|
|
2021-10-14 08:10:10 +00:00
|
|
|
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
|
|
|
|
{
|
|
|
|
struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
|
|
|
|
struct erofs_device_info *dif;
|
erofs: add file-backed mount support
It actually has been around for years: For containers and other sandbox
use cases, there will be thousands (and even more) of authenticated
(sub)images running on the same host, unlike OS images.
Of course, all scenarios can use the same EROFS on-disk format, but
bdev-backed mounts just work well for OS images since golden data is
dumped into real block devices. However, it's somewhat hard for
container runtimes to manage and isolate so many unnecessary virtual
block devices safely and efficiently [1]: they just look like a burden
to orchestrators and file-backed mounts are preferred indeed. There
were already enough attempts such as Incremental FS, the original
ComposeFS and PuzzleFS acting in the same way for immutable fses. As
for current EROFS users, ComposeFS, containerd and Android APEXs will
be directly benefited from it.
On the other hand, previous experimental feature "erofs over fscache"
was once also intended to provide a similar solution (inspired by
Incremental FS discussion [2]), but the following facts show file-backed
mounts will be a better approach:
- Fscache infrastructure has recently been moved into new Netfslib
which is an unexpected dependency to EROFS really, although it
originally claims "it could be used for caching other things such as
ISO9660 filesystems too." [3]
- It takes an unexpectedly long time to upstream Fscache/Cachefiles
enhancements. For example, the failover feature took more than
one year, and the deamonless feature is still far behind now;
- Ongoing HSM "fanotify pre-content hooks" [4] together with this will
perfectly supersede "erofs over fscache" in a simpler way since
developers (mainly containerd folks) could leverage their existing
caching mechanism entirely in userspace instead of strictly following
the predefined in-kernel caching tree hierarchy.
After "fanotify pre-content hooks" lands upstream to provide the same
functionality, "erofs over fscache" will be removed then (as an EROFS
internal improvement and EROFS will not have to bother with on-demand
fetching and/or caching improvements anymore.)
[1] https://github.com/containers/storage/pull/2039
[2] https://lore.kernel.org/r/CAOQ4uxjbVxnubaPjVaGYiSwoGDTdpWbB=w_AeM6YM=zVixsUfQ@mail.gmail.com
[3] https://docs.kernel.org/filesystems/caching/fscache.html
[4] https://lore.kernel.org/r/cover.1723670362.git.josef@toxicpanda.com
Closes: https://github.com/containers/composefs/issues/144
Reviewed-by: Sandeep Dhavale <dhavale@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240830032840.3783206-1-hsiangkao@linux.alibaba.com
2024-08-30 03:28:37 +00:00
|
|
|
erofs_off_t startoff, length;
|
2021-10-14 08:10:10 +00:00
|
|
|
int id;
|
|
|
|
|
|
|
|
map->m_bdev = sb->s_bdev;
|
|
|
|
map->m_daxdev = EROFS_SB(sb)->dax_dev;
|
2021-11-29 10:22:00 +00:00
|
|
|
map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
|
2022-04-25 12:21:38 +00:00
|
|
|
map->m_fscache = EROFS_SB(sb)->s_fscache;
|
2024-09-05 09:30:31 +00:00
|
|
|
map->m_fp = EROFS_SB(sb)->fdev;
|
2021-10-14 08:10:10 +00:00
|
|
|
|
|
|
|
if (map->m_deviceid) {
|
|
|
|
down_read(&devs->rwsem);
|
|
|
|
dif = idr_find(&devs->tree, map->m_deviceid - 1);
|
|
|
|
if (!dif) {
|
|
|
|
up_read(&devs->rwsem);
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
2023-03-02 07:17:51 +00:00
|
|
|
if (devs->flatdev) {
|
|
|
|
map->m_pa += erofs_pos(sb, dif->mapped_blkaddr);
|
|
|
|
up_read(&devs->rwsem);
|
|
|
|
return 0;
|
|
|
|
}
|
erofs: add file-backed mount support
It actually has been around for years: For containers and other sandbox
use cases, there will be thousands (and even more) of authenticated
(sub)images running on the same host, unlike OS images.
Of course, all scenarios can use the same EROFS on-disk format, but
bdev-backed mounts just work well for OS images since golden data is
dumped into real block devices. However, it's somewhat hard for
container runtimes to manage and isolate so many unnecessary virtual
block devices safely and efficiently [1]: they just look like a burden
to orchestrators and file-backed mounts are preferred indeed. There
were already enough attempts such as Incremental FS, the original
ComposeFS and PuzzleFS acting in the same way for immutable fses. As
for current EROFS users, ComposeFS, containerd and Android APEXs will
be directly benefited from it.
On the other hand, previous experimental feature "erofs over fscache"
was once also intended to provide a similar solution (inspired by
Incremental FS discussion [2]), but the following facts show file-backed
mounts will be a better approach:
- Fscache infrastructure has recently been moved into new Netfslib
which is an unexpected dependency to EROFS really, although it
originally claims "it could be used for caching other things such as
ISO9660 filesystems too." [3]
- It takes an unexpectedly long time to upstream Fscache/Cachefiles
enhancements. For example, the failover feature took more than
one year, and the deamonless feature is still far behind now;
- Ongoing HSM "fanotify pre-content hooks" [4] together with this will
perfectly supersede "erofs over fscache" in a simpler way since
developers (mainly containerd folks) could leverage their existing
caching mechanism entirely in userspace instead of strictly following
the predefined in-kernel caching tree hierarchy.
After "fanotify pre-content hooks" lands upstream to provide the same
functionality, "erofs over fscache" will be removed then (as an EROFS
internal improvement and EROFS will not have to bother with on-demand
fetching and/or caching improvements anymore.)
[1] https://github.com/containers/storage/pull/2039
[2] https://lore.kernel.org/r/CAOQ4uxjbVxnubaPjVaGYiSwoGDTdpWbB=w_AeM6YM=zVixsUfQ@mail.gmail.com
[3] https://docs.kernel.org/filesystems/caching/fscache.html
[4] https://lore.kernel.org/r/cover.1723670362.git.josef@toxicpanda.com
Closes: https://github.com/containers/composefs/issues/144
Reviewed-by: Sandeep Dhavale <dhavale@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240830032840.3783206-1-hsiangkao@linux.alibaba.com
2024-08-30 03:28:37 +00:00
|
|
|
erofs_fill_from_devinfo(map, dif);
|
2021-10-14 08:10:10 +00:00
|
|
|
up_read(&devs->rwsem);
|
2023-03-02 07:17:51 +00:00
|
|
|
} else if (devs->extra_devices && !devs->flatdev) {
|
2021-10-14 08:10:10 +00:00
|
|
|
down_read(&devs->rwsem);
|
|
|
|
idr_for_each_entry(&devs->tree, dif, id) {
|
|
|
|
if (!dif->mapped_blkaddr)
|
|
|
|
continue;
|
erofs: add file-backed mount support
It actually has been around for years: For containers and other sandbox
use cases, there will be thousands (and even more) of authenticated
(sub)images running on the same host, unlike OS images.
Of course, all scenarios can use the same EROFS on-disk format, but
bdev-backed mounts just work well for OS images since golden data is
dumped into real block devices. However, it's somewhat hard for
container runtimes to manage and isolate so many unnecessary virtual
block devices safely and efficiently [1]: they just look like a burden
to orchestrators and file-backed mounts are preferred indeed. There
were already enough attempts such as Incremental FS, the original
ComposeFS and PuzzleFS acting in the same way for immutable fses. As
for current EROFS users, ComposeFS, containerd and Android APEXs will
be directly benefited from it.
On the other hand, previous experimental feature "erofs over fscache"
was once also intended to provide a similar solution (inspired by
Incremental FS discussion [2]), but the following facts show file-backed
mounts will be a better approach:
- Fscache infrastructure has recently been moved into new Netfslib
which is an unexpected dependency to EROFS really, although it
originally claims "it could be used for caching other things such as
ISO9660 filesystems too." [3]
- It takes an unexpectedly long time to upstream Fscache/Cachefiles
enhancements. For example, the failover feature took more than
one year, and the deamonless feature is still far behind now;
- Ongoing HSM "fanotify pre-content hooks" [4] together with this will
perfectly supersede "erofs over fscache" in a simpler way since
developers (mainly containerd folks) could leverage their existing
caching mechanism entirely in userspace instead of strictly following
the predefined in-kernel caching tree hierarchy.
After "fanotify pre-content hooks" lands upstream to provide the same
functionality, "erofs over fscache" will be removed then (as an EROFS
internal improvement and EROFS will not have to bother with on-demand
fetching and/or caching improvements anymore.)
[1] https://github.com/containers/storage/pull/2039
[2] https://lore.kernel.org/r/CAOQ4uxjbVxnubaPjVaGYiSwoGDTdpWbB=w_AeM6YM=zVixsUfQ@mail.gmail.com
[3] https://docs.kernel.org/filesystems/caching/fscache.html
[4] https://lore.kernel.org/r/cover.1723670362.git.josef@toxicpanda.com
Closes: https://github.com/containers/composefs/issues/144
Reviewed-by: Sandeep Dhavale <dhavale@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240830032840.3783206-1-hsiangkao@linux.alibaba.com
2024-08-30 03:28:37 +00:00
|
|
|
|
2023-03-13 13:53:08 +00:00
|
|
|
startoff = erofs_pos(sb, dif->mapped_blkaddr);
|
|
|
|
length = erofs_pos(sb, dif->blocks);
|
2021-10-14 08:10:10 +00:00
|
|
|
if (map->m_pa >= startoff &&
|
|
|
|
map->m_pa < startoff + length) {
|
|
|
|
map->m_pa -= startoff;
|
erofs: add file-backed mount support
It actually has been around for years: For containers and other sandbox
use cases, there will be thousands (and even more) of authenticated
(sub)images running on the same host, unlike OS images.
Of course, all scenarios can use the same EROFS on-disk format, but
bdev-backed mounts just work well for OS images since golden data is
dumped into real block devices. However, it's somewhat hard for
container runtimes to manage and isolate so many unnecessary virtual
block devices safely and efficiently [1]: they just look like a burden
to orchestrators and file-backed mounts are preferred indeed. There
were already enough attempts such as Incremental FS, the original
ComposeFS and PuzzleFS acting in the same way for immutable fses. As
for current EROFS users, ComposeFS, containerd and Android APEXs will
be directly benefited from it.
On the other hand, previous experimental feature "erofs over fscache"
was once also intended to provide a similar solution (inspired by
Incremental FS discussion [2]), but the following facts show file-backed
mounts will be a better approach:
- Fscache infrastructure has recently been moved into new Netfslib
which is an unexpected dependency to EROFS really, although it
originally claims "it could be used for caching other things such as
ISO9660 filesystems too." [3]
- It takes an unexpectedly long time to upstream Fscache/Cachefiles
enhancements. For example, the failover feature took more than
one year, and the deamonless feature is still far behind now;
- Ongoing HSM "fanotify pre-content hooks" [4] together with this will
perfectly supersede "erofs over fscache" in a simpler way since
developers (mainly containerd folks) could leverage their existing
caching mechanism entirely in userspace instead of strictly following
the predefined in-kernel caching tree hierarchy.
After "fanotify pre-content hooks" lands upstream to provide the same
functionality, "erofs over fscache" will be removed then (as an EROFS
internal improvement and EROFS will not have to bother with on-demand
fetching and/or caching improvements anymore.)
[1] https://github.com/containers/storage/pull/2039
[2] https://lore.kernel.org/r/CAOQ4uxjbVxnubaPjVaGYiSwoGDTdpWbB=w_AeM6YM=zVixsUfQ@mail.gmail.com
[3] https://docs.kernel.org/filesystems/caching/fscache.html
[4] https://lore.kernel.org/r/cover.1723670362.git.josef@toxicpanda.com
Closes: https://github.com/containers/composefs/issues/144
Reviewed-by: Sandeep Dhavale <dhavale@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240830032840.3783206-1-hsiangkao@linux.alibaba.com
2024-08-30 03:28:37 +00:00
|
|
|
erofs_fill_from_devinfo(map, dif);
|
2021-10-14 08:10:10 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
up_read(&devs->rwsem);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-09-05 09:30:31 +00:00
|
|
|
/*
|
|
|
|
* bit 30: I/O error occurred on this folio
|
|
|
|
* bit 0 - 29: remaining parts to complete this folio
|
|
|
|
*/
|
|
|
|
#define EROFS_ONLINEFOLIO_EIO (1 << 30)
|
|
|
|
|
|
|
|
void erofs_onlinefolio_init(struct folio *folio)
|
|
|
|
{
|
|
|
|
union {
|
|
|
|
atomic_t o;
|
|
|
|
void *v;
|
|
|
|
} u = { .o = ATOMIC_INIT(1) };
|
|
|
|
|
|
|
|
folio->private = u.v; /* valid only if file-backed folio is locked */
|
|
|
|
}
|
|
|
|
|
|
|
|
void erofs_onlinefolio_split(struct folio *folio)
|
|
|
|
{
|
|
|
|
atomic_inc((atomic_t *)&folio->private);
|
|
|
|
}
|
|
|
|
|
|
|
|
void erofs_onlinefolio_end(struct folio *folio, int err)
|
|
|
|
{
|
|
|
|
int orig, v;
|
|
|
|
|
|
|
|
do {
|
|
|
|
orig = atomic_read((atomic_t *)&folio->private);
|
|
|
|
v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0);
|
|
|
|
} while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
|
|
|
|
|
|
|
|
if (v & ~EROFS_ONLINEFOLIO_EIO)
|
|
|
|
return;
|
|
|
|
folio->private = 0;
|
|
|
|
folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO));
|
|
|
|
}
|
|
|
|
|
2021-08-05 00:35:59 +00:00
|
|
|
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
|
|
|
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
|
|
|
|
{
|
|
|
|
int ret;
|
2023-03-13 13:53:08 +00:00
|
|
|
struct super_block *sb = inode->i_sb;
|
2021-08-05 00:35:59 +00:00
|
|
|
struct erofs_map_blocks map;
|
2021-10-14 08:10:10 +00:00
|
|
|
struct erofs_map_dev mdev;
|
2021-08-05 00:35:59 +00:00
|
|
|
|
|
|
|
map.m_la = offset;
|
|
|
|
map.m_llen = length;
|
|
|
|
|
2023-02-09 02:48:25 +00:00
|
|
|
ret = erofs_map_blocks(inode, &map);
|
2021-08-05 00:35:59 +00:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
2021-10-14 08:10:10 +00:00
|
|
|
mdev = (struct erofs_map_dev) {
|
|
|
|
.m_deviceid = map.m_deviceid,
|
|
|
|
.m_pa = map.m_pa,
|
|
|
|
};
|
2023-03-13 13:53:08 +00:00
|
|
|
ret = erofs_map_dev(sb, &mdev);
|
2021-10-14 08:10:10 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2021-08-05 00:35:59 +00:00
|
|
|
iomap->offset = map.m_la;
|
2022-01-13 05:18:45 +00:00
|
|
|
if (flags & IOMAP_DAX)
|
2021-11-29 10:22:00 +00:00
|
|
|
iomap->dax_dev = mdev.m_daxdev;
|
2022-01-13 05:18:45 +00:00
|
|
|
else
|
2021-11-29 10:22:00 +00:00
|
|
|
iomap->bdev = mdev.m_bdev;
|
2021-08-05 00:35:59 +00:00
|
|
|
iomap->length = map.m_llen;
|
|
|
|
iomap->flags = 0;
|
2021-08-05 00:36:01 +00:00
|
|
|
iomap->private = NULL;
|
2021-08-05 00:35:59 +00:00
|
|
|
|
|
|
|
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
|
|
|
|
iomap->type = IOMAP_HOLE;
|
|
|
|
iomap->addr = IOMAP_NULL_ADDR;
|
|
|
|
if (!iomap->length)
|
|
|
|
iomap->length = length;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (map.m_flags & EROFS_MAP_META) {
|
2022-01-02 04:00:13 +00:00
|
|
|
void *ptr;
|
|
|
|
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
|
2021-08-05 00:36:01 +00:00
|
|
|
|
|
|
|
iomap->type = IOMAP_INLINE;
|
erofs: don't align offset for erofs_read_metabuf() (simple cases)
Most of the callers of erofs_read_metabuf() have the following form:
block = erofs_blknr(sb, offset);
off = erofs_blkoff(sb, offset);
p = erofs_read_metabuf(...., erofs_pos(sb, block), ...);
if (IS_ERR(p))
return PTR_ERR(p);
q = p + off;
// no further uses of p, block or off.
The value passed to erofs_read_metabuf() is offset rounded down to block
size, i.e. offset - off. Passing offset as-is would increase the return
value by off in case of success and keep the return value unchanged in
in case of error. In other words, the same could be achieved by
q = erofs_read_metabuf(...., offset, ...);
if (IS_ERR(q))
return PTR_ERR(q);
This commit convert these simple cases.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lore.kernel.org/r/20240425195915.GD1031757@ZenIV
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2024-04-25 19:59:15 +00:00
|
|
|
ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, EROFS_KMAP);
|
2022-01-02 04:00:13 +00:00
|
|
|
if (IS_ERR(ptr))
|
|
|
|
return PTR_ERR(ptr);
|
erofs: don't align offset for erofs_read_metabuf() (simple cases)
Most of the callers of erofs_read_metabuf() have the following form:
block = erofs_blknr(sb, offset);
off = erofs_blkoff(sb, offset);
p = erofs_read_metabuf(...., erofs_pos(sb, block), ...);
if (IS_ERR(p))
return PTR_ERR(p);
q = p + off;
// no further uses of p, block or off.
The value passed to erofs_read_metabuf() is offset rounded down to block
size, i.e. offset - off. Passing offset as-is would increase the return
value by off in case of success and keep the return value unchanged in
in case of error. In other words, the same could be achieved by
q = erofs_read_metabuf(...., offset, ...);
if (IS_ERR(q))
return PTR_ERR(q);
This commit convert these simple cases.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lore.kernel.org/r/20240425195915.GD1031757@ZenIV
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2024-04-25 19:59:15 +00:00
|
|
|
iomap->inline_data = ptr;
|
2022-01-02 04:00:13 +00:00
|
|
|
iomap->private = buf.base;
|
2021-08-05 00:36:01 +00:00
|
|
|
} else {
|
|
|
|
iomap->type = IOMAP_MAPPED;
|
2021-10-14 08:10:10 +00:00
|
|
|
iomap->addr = mdev.m_pa;
|
2022-01-13 05:18:45 +00:00
|
|
|
if (flags & IOMAP_DAX)
|
|
|
|
iomap->addr += mdev.m_dax_part_off;
|
2021-08-05 00:35:59 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-08-05 00:36:01 +00:00
|
|
|
static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
|
|
|
|
ssize_t written, unsigned int flags, struct iomap *iomap)
|
|
|
|
{
|
2022-01-02 04:00:13 +00:00
|
|
|
void *ptr = iomap->private;
|
|
|
|
|
|
|
|
if (ptr) {
|
|
|
|
struct erofs_buf buf = {
|
|
|
|
.page = kmap_to_page(ptr),
|
|
|
|
.base = ptr,
|
|
|
|
.kmap_type = EROFS_KMAP,
|
|
|
|
};
|
2021-08-05 00:36:01 +00:00
|
|
|
|
|
|
|
DBG_BUGON(iomap->type != IOMAP_INLINE);
|
2022-01-02 04:00:13 +00:00
|
|
|
erofs_put_metabuf(&buf);
|
2021-08-05 00:36:01 +00:00
|
|
|
} else {
|
|
|
|
DBG_BUGON(iomap->type == IOMAP_INLINE);
|
|
|
|
}
|
|
|
|
return written;
|
|
|
|
}
|
|
|
|
|
2021-08-05 00:35:59 +00:00
|
|
|
static const struct iomap_ops erofs_iomap_ops = {
|
|
|
|
.iomap_begin = erofs_iomap_begin,
|
2021-08-05 00:36:01 +00:00
|
|
|
.iomap_end = erofs_iomap_end,
|
2021-08-05 00:35:59 +00:00
|
|
|
};
|
|
|
|
|
2021-08-13 05:29:31 +00:00
|
|
|
int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
|
|
|
u64 start, u64 len)
|
|
|
|
{
|
|
|
|
if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) {
|
|
|
|
#ifdef CONFIG_EROFS_FS_ZIP
|
|
|
|
return iomap_fiemap(inode, fieinfo, start, len,
|
|
|
|
&z_erofs_iomap_report_ops);
|
|
|
|
#else
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops);
|
|
|
|
}
|
|
|
|
|
2021-08-05 00:36:01 +00:00
|
|
|
/*
|
|
|
|
* since we dont have write or truncate flows, so no inode
|
|
|
|
* locking needs to be held at the moment.
|
|
|
|
*/
|
2022-04-29 12:54:32 +00:00
|
|
|
static int erofs_read_folio(struct file *file, struct folio *folio)
|
2021-08-05 00:36:01 +00:00
|
|
|
{
|
2022-04-29 12:54:32 +00:00
|
|
|
return iomap_read_folio(folio, &erofs_iomap_ops);
|
2021-08-05 00:36:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void erofs_readahead(struct readahead_control *rac)
|
|
|
|
{
|
|
|
|
return iomap_readahead(rac, &erofs_iomap_ops);
|
|
|
|
}
|
|
|
|
|
|
|
|
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
|
|
|
|
{
|
|
|
|
return iomap_bmap(mapping, block, &erofs_iomap_ops);
|
|
|
|
}
|
|
|
|
|
2022-07-20 08:22:29 +00:00
|
|
|
static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
2021-08-05 00:35:59 +00:00
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(iocb->ki_filp);
|
|
|
|
|
|
|
|
/* no need taking (shared) inode lock since it's a ro filesystem */
|
|
|
|
if (!iov_iter_count(to))
|
|
|
|
return 0;
|
|
|
|
|
2021-08-05 00:36:00 +00:00
|
|
|
#ifdef CONFIG_FS_DAX
|
2022-07-20 08:22:29 +00:00
|
|
|
if (IS_DAX(inode))
|
2021-08-05 00:36:00 +00:00
|
|
|
return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
|
|
|
|
#endif
|
2021-08-05 00:35:59 +00:00
|
|
|
if (iocb->ki_flags & IOCB_DIRECT) {
|
2022-07-20 08:22:29 +00:00
|
|
|
struct block_device *bdev = inode->i_sb->s_bdev;
|
|
|
|
unsigned int blksize_mask;
|
|
|
|
|
|
|
|
if (bdev)
|
|
|
|
blksize_mask = bdev_logical_block_size(bdev) - 1;
|
|
|
|
else
|
2023-03-06 07:55:27 +00:00
|
|
|
blksize_mask = i_blocksize(inode) - 1;
|
2022-07-20 08:22:29 +00:00
|
|
|
|
|
|
|
if ((iocb->ki_pos | iov_iter_count(to) |
|
|
|
|
iov_iter_alignment(to)) & blksize_mask)
|
|
|
|
return -EINVAL;
|
2021-08-05 00:35:59 +00:00
|
|
|
|
2022-07-20 08:22:29 +00:00
|
|
|
return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
|
|
|
|
NULL, 0, NULL, 0);
|
2021-08-05 00:35:59 +00:00
|
|
|
}
|
|
|
|
return filemap_read(iocb, to, 0);
|
|
|
|
}
|
|
|
|
|
2018-07-26 12:21:47 +00:00
|
|
|
/* for uncompressed (aligned) files and raw access for other files */
|
2024-09-05 09:30:31 +00:00
|
|
|
const struct address_space_operations erofs_aops = {
|
2022-04-29 12:54:32 +00:00
|
|
|
.read_folio = erofs_read_folio,
|
2021-08-05 00:36:01 +00:00
|
|
|
.readahead = erofs_readahead,
|
2019-07-16 09:32:56 +00:00
|
|
|
.bmap = erofs_bmap,
|
2021-08-05 00:35:59 +00:00
|
|
|
.direct_IO = noop_direct_IO,
|
2022-11-30 06:04:55 +00:00
|
|
|
.release_folio = iomap_release_folio,
|
|
|
|
.invalidate_folio = iomap_invalidate_folio,
|
2021-08-05 00:35:59 +00:00
|
|
|
};
|
|
|
|
|
2021-08-05 00:36:00 +00:00
|
|
|
#ifdef CONFIG_FS_DAX
|
|
|
|
static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf,
|
2023-08-18 20:23:35 +00:00
|
|
|
unsigned int order)
|
2021-08-05 00:36:00 +00:00
|
|
|
{
|
2023-08-18 20:23:35 +00:00
|
|
|
return dax_iomap_fault(vmf, order, NULL, NULL, &erofs_iomap_ops);
|
2021-08-05 00:36:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static vm_fault_t erofs_dax_fault(struct vm_fault *vmf)
|
|
|
|
{
|
2023-08-18 20:23:35 +00:00
|
|
|
return erofs_dax_huge_fault(vmf, 0);
|
2021-08-05 00:36:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct vm_operations_struct erofs_dax_vm_ops = {
|
|
|
|
.fault = erofs_dax_fault,
|
|
|
|
.huge_fault = erofs_dax_huge_fault,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
if (!IS_DAX(file_inode(file)))
|
|
|
|
return generic_file_readonly_mmap(file, vma);
|
|
|
|
|
|
|
|
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
vma->vm_ops = &erofs_dax_vm_ops;
|
2023-01-26 19:37:49 +00:00
|
|
|
vm_flags_set(vma, VM_HUGEPAGE);
|
2021-08-05 00:36:00 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
#define erofs_file_mmap generic_file_readonly_mmap
|
|
|
|
#endif
|
|
|
|
|
2021-08-05 00:35:59 +00:00
|
|
|
const struct file_operations erofs_file_fops = {
|
|
|
|
.llseek = generic_file_llseek,
|
|
|
|
.read_iter = erofs_file_read_iter,
|
2021-08-05 00:36:00 +00:00
|
|
|
.mmap = erofs_file_mmap,
|
2024-03-06 05:31:38 +00:00
|
|
|
.get_unmapped_area = thp_get_unmapped_area,
|
2023-05-22 13:50:15 +00:00
|
|
|
.splice_read = filemap_splice_read,
|
2018-07-26 12:21:47 +00:00
|
|
|
};
|