diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index addfe608d08e..f57255ab88ed 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -82,3 +82,19 @@ config EROFS_FS_ZIP Enable fixed-sized output compression for EROFS. If you don't want to enable compression feature, say N. + +config EROFS_FS_ZIP_LZMA + bool "EROFS LZMA compressed data support" + depends on EROFS_FS_ZIP + select XZ_DEC + select XZ_DEC_MICROLZMA + help + Saying Y here includes support for reading EROFS file systems + containing LZMA compressed data, specifically called microLZMA. it + gives better compression ratios than the LZ4 algorithm, at the + expense of more CPU overhead. + + LZMA support is an experimental feature for now and so most file + systems will be readable without selecting this option. + + If unsure, say N. diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile index 1f9aced49070..756fe2d65272 100644 --- a/fs/erofs/Makefile +++ b/fs/erofs/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_EROFS_FS) += erofs.o erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o +erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index ad62d1b4d371..8ea6a9b14962 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -20,6 +20,12 @@ struct z_erofs_decompress_req { bool inplace_io, partial_decoding; }; +struct z_erofs_decompressor { + int (*decompress)(struct z_erofs_decompress_req *rq, + struct list_head *pagepool); + char *name; +}; + /* some special page->private (unsigned long, see below) */ #define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2) #define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2) @@ -75,7 +81,17 @@ static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool, return true; } +#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) +static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, + struct page *page) +{ + return page->mapping == MNGD_MAPPING(sbi); +} + int z_erofs_decompress(struct z_erofs_decompress_req *rq, struct list_head *pagepool); +/* prototypes for specific algorithms */ +int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, + struct list_head *pagepool); #endif diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 8fd7af9d6b38..8a624d73c185 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -16,12 +16,6 @@ #define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32) #endif -struct z_erofs_decompressor { - int (*decompress)(struct z_erofs_decompress_req *rq, - struct list_head *pagepool); - char *name; -}; - int z_erofs_load_lz4_config(struct super_block *sb, struct erofs_super_block *dsb, struct z_erofs_lz4_cfgs *lz4, int size) @@ -349,6 +343,12 @@ static struct z_erofs_decompressor decompressors[] = { .decompress = z_erofs_lz4_decompress, .name = "lz4" }, +#ifdef CONFIG_EROFS_FS_ZIP_LZMA + [Z_EROFS_COMPRESSION_LZMA] = { + .decompress = z_erofs_lzma_decompress, + .name = "lzma" + }, +#endif }; int z_erofs_decompress(struct z_erofs_decompress_req *rq, diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c new file mode 100644 index 000000000000..bd7d9809ecf7 --- /dev/null +++ b/fs/erofs/decompressor_lzma.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include "compress.h" + +struct z_erofs_lzma { + struct z_erofs_lzma *next; + struct xz_dec_microlzma *state; + struct xz_buf buf; + u8 bounce[PAGE_SIZE]; +}; + +/* considering the LZMA performance, no need to use a lockless list for now */ +static DEFINE_SPINLOCK(z_erofs_lzma_lock); +static unsigned int z_erofs_lzma_max_dictsize; +static unsigned int z_erofs_lzma_nstrms, z_erofs_lzma_avail_strms; +static struct z_erofs_lzma *z_erofs_lzma_head; +static DECLARE_WAIT_QUEUE_HEAD(z_erofs_lzma_wq); + +module_param_named(lzma_streams, z_erofs_lzma_nstrms, uint, 0444); + +void z_erofs_lzma_exit(void) +{ + /* there should be no running fs instance */ + while (z_erofs_lzma_avail_strms) { + struct z_erofs_lzma *strm; + + spin_lock(&z_erofs_lzma_lock); + strm = z_erofs_lzma_head; + if (!strm) { + spin_unlock(&z_erofs_lzma_lock); + DBG_BUGON(1); + return; + } + z_erofs_lzma_head = NULL; + spin_unlock(&z_erofs_lzma_lock); + + while (strm) { + struct z_erofs_lzma *n = strm->next; + + if (strm->state) + xz_dec_microlzma_end(strm->state); + kfree(strm); + --z_erofs_lzma_avail_strms; + strm = n; + } + } +} + +int z_erofs_lzma_init(void) +{ + unsigned int i; + + /* by default, use # of possible CPUs instead */ + if (!z_erofs_lzma_nstrms) + z_erofs_lzma_nstrms = num_possible_cpus(); + + for (i = 0; i < z_erofs_lzma_nstrms; ++i) { + struct z_erofs_lzma *strm = kzalloc(sizeof(*strm), GFP_KERNEL); + + if (!strm) { + z_erofs_lzma_exit(); + return -ENOMEM; + } + spin_lock(&z_erofs_lzma_lock); + strm->next = z_erofs_lzma_head; + z_erofs_lzma_head = strm; + spin_unlock(&z_erofs_lzma_lock); + ++z_erofs_lzma_avail_strms; + } + return 0; +} + +int z_erofs_load_lzma_config(struct super_block *sb, + struct erofs_super_block *dsb, + struct z_erofs_lzma_cfgs *lzma, int size) +{ + static DEFINE_MUTEX(lzma_resize_mutex); + unsigned int dict_size, i; + struct z_erofs_lzma *strm, *head = NULL; + int err; + + if (!lzma || size < sizeof(struct z_erofs_lzma_cfgs)) { + erofs_err(sb, "invalid lzma cfgs, size=%u", size); + return -EINVAL; + } + if (lzma->format) { + erofs_err(sb, "unidentified lzma format %x, please check kernel version", + le16_to_cpu(lzma->format)); + return -EINVAL; + } + dict_size = le32_to_cpu(lzma->dict_size); + if (dict_size > Z_EROFS_LZMA_MAX_DICT_SIZE || dict_size < 4096) { + erofs_err(sb, "unsupported lzma dictionary size %u", + dict_size); + return -EINVAL; + } + + erofs_info(sb, "EXPERIMENTAL MicroLZMA in use. Use at your own risk!"); + + /* in case 2 z_erofs_load_lzma_config() race to avoid deadlock */ + mutex_lock(&lzma_resize_mutex); + + if (z_erofs_lzma_max_dictsize >= dict_size) { + mutex_unlock(&lzma_resize_mutex); + return 0; + } + + /* 1. collect/isolate all streams for the following check */ + for (i = 0; i < z_erofs_lzma_avail_strms; ++i) { + struct z_erofs_lzma *last; + +again: + spin_lock(&z_erofs_lzma_lock); + strm = z_erofs_lzma_head; + if (!strm) { + spin_unlock(&z_erofs_lzma_lock); + wait_event(z_erofs_lzma_wq, + READ_ONCE(z_erofs_lzma_head)); + goto again; + } + z_erofs_lzma_head = NULL; + spin_unlock(&z_erofs_lzma_lock); + + for (last = strm; last->next; last = last->next) + ++i; + last->next = head; + head = strm; + } + + err = 0; + /* 2. walk each isolated stream and grow max dict_size if needed */ + for (strm = head; strm; strm = strm->next) { + if (strm->state) + xz_dec_microlzma_end(strm->state); + strm->state = xz_dec_microlzma_alloc(XZ_PREALLOC, dict_size); + if (!strm->state) + err = -ENOMEM; + } + + /* 3. push back all to the global list and update max dict_size */ + spin_lock(&z_erofs_lzma_lock); + DBG_BUGON(z_erofs_lzma_head); + z_erofs_lzma_head = head; + spin_unlock(&z_erofs_lzma_lock); + + z_erofs_lzma_max_dictsize = dict_size; + mutex_unlock(&lzma_resize_mutex); + return err; +} + +int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, + struct list_head *pagepool) +{ + const unsigned int nrpages_out = + PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; + const unsigned int nrpages_in = + PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; + unsigned int inputmargin, inlen, outlen, pageofs; + struct z_erofs_lzma *strm; + u8 *kin; + bool bounced = false; + int no, ni, j, err = 0; + + /* 1. get the exact LZMA compressed size */ + kin = kmap(*rq->in); + inputmargin = 0; + while (!kin[inputmargin & ~PAGE_MASK]) + if (!(++inputmargin & ~PAGE_MASK)) + break; + + if (inputmargin >= PAGE_SIZE) { + kunmap(*rq->in); + return -EFSCORRUPTED; + } + rq->inputsize -= inputmargin; + + /* 2. get an available lzma context */ +again: + spin_lock(&z_erofs_lzma_lock); + strm = z_erofs_lzma_head; + if (!strm) { + spin_unlock(&z_erofs_lzma_lock); + wait_event(z_erofs_lzma_wq, READ_ONCE(z_erofs_lzma_head)); + goto again; + } + z_erofs_lzma_head = strm->next; + spin_unlock(&z_erofs_lzma_lock); + + /* 3. multi-call decompress */ + inlen = rq->inputsize; + outlen = rq->outputsize; + xz_dec_microlzma_reset(strm->state, inlen, outlen, + !rq->partial_decoding); + pageofs = rq->pageofs_out; + strm->buf.in = kin + inputmargin; + strm->buf.in_pos = 0; + strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - inputmargin); + inlen -= strm->buf.in_size; + strm->buf.out = NULL; + strm->buf.out_pos = 0; + strm->buf.out_size = 0; + + for (ni = 0, no = -1;;) { + enum xz_ret xz_err; + + if (strm->buf.out_pos == strm->buf.out_size) { + if (strm->buf.out) { + kunmap(rq->out[no]); + strm->buf.out = NULL; + } + + if (++no >= nrpages_out || !outlen) { + erofs_err(rq->sb, "decompressed buf out of bound"); + err = -EFSCORRUPTED; + break; + } + strm->buf.out_pos = 0; + strm->buf.out_size = min_t(u32, outlen, + PAGE_SIZE - pageofs); + outlen -= strm->buf.out_size; + if (rq->out[no]) + strm->buf.out = kmap(rq->out[no]) + pageofs; + pageofs = 0; + } else if (strm->buf.in_pos == strm->buf.in_size) { + kunmap(rq->in[ni]); + + if (++ni >= nrpages_in || !inlen) { + erofs_err(rq->sb, "compressed buf out of bound"); + err = -EFSCORRUPTED; + break; + } + strm->buf.in_pos = 0; + strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE); + inlen -= strm->buf.in_size; + kin = kmap(rq->in[ni]); + strm->buf.in = kin; + bounced = false; + } + + /* + * Handle overlapping: Use bounced buffer if the compressed + * data is under processing; Otherwise, Use short-lived pages + * from the on-stack pagepool where pages share with the same + * request. + */ + if (!bounced && rq->out[no] == rq->in[ni]) { + memcpy(strm->bounce, strm->buf.in, strm->buf.in_size); + strm->buf.in = strm->bounce; + bounced = true; + } + for (j = ni + 1; j < nrpages_in; ++j) { + struct page *tmppage; + + if (rq->out[no] != rq->in[j]) + continue; + + DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb), + rq->in[j])); + tmppage = erofs_allocpage(pagepool, + GFP_KERNEL | __GFP_NOFAIL); + set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); + copy_highpage(tmppage, rq->in[j]); + rq->in[j] = tmppage; + } + xz_err = xz_dec_microlzma_run(strm->state, &strm->buf); + DBG_BUGON(strm->buf.out_pos > strm->buf.out_size); + DBG_BUGON(strm->buf.in_pos > strm->buf.in_size); + + if (xz_err != XZ_OK) { + if (xz_err == XZ_STREAM_END && !outlen) + break; + erofs_err(rq->sb, "failed to decompress %d in[%u] out[%u]", + xz_err, rq->inputsize, rq->outputsize); + err = -EFSCORRUPTED; + break; + } + } + if (no < nrpages_out && strm->buf.out) + kunmap(rq->in[no]); + if (ni < nrpages_in) + kunmap(rq->in[ni]); + /* 4. push back LZMA stream context to the global list */ + spin_lock(&z_erofs_lzma_lock); + strm->next = z_erofs_lzma_head; + z_erofs_lzma_head = strm; + spin_unlock(&z_erofs_lzma_lock); + wake_up(&z_erofs_lzma_wq); + return err; +} diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 1c2917181346..083997a034e5 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -264,10 +264,11 @@ struct erofs_inode_chunk_index { /* available compression algorithm types (for h_algorithmtype) */ enum { - Z_EROFS_COMPRESSION_LZ4 = 0, + Z_EROFS_COMPRESSION_LZ4 = 0, + Z_EROFS_COMPRESSION_LZMA = 1, Z_EROFS_COMPRESSION_MAX }; -#define Z_EROFS_ALL_COMPR_ALGS (1 << (Z_EROFS_COMPRESSION_MAX - 1)) +#define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1) /* 14 bytes (+ length field = 16 bytes) */ struct z_erofs_lz4_cfgs { @@ -276,6 +277,15 @@ struct z_erofs_lz4_cfgs { u8 reserved[10]; } __packed; +/* 14 bytes (+ length field = 16 bytes) */ +struct z_erofs_lzma_cfgs { + __le32 dict_size; + __le16 format; + u8 reserved[8]; +} __packed; + +#define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE) + /* * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on) * e.g. for 4k logical cluster size, 4B if compacted 2B is off; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 354ce3cb2b32..a6a53d22dfd6 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -407,6 +407,8 @@ struct erofs_map_blocks { * approach instead if possible since it's more metadata lightweight.) */ #define EROFS_GET_BLOCKS_FIEMAP 0x0002 +/* Used to map the whole extent if non-negligible data is requested for LZMA */ +#define EROFS_GET_BLOCKS_READMORE 0x0004 enum { Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, @@ -537,6 +539,26 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb, } #endif /* !CONFIG_EROFS_FS_ZIP */ +#ifdef CONFIG_EROFS_FS_ZIP_LZMA +int z_erofs_lzma_init(void); +void z_erofs_lzma_exit(void); +int z_erofs_load_lzma_config(struct super_block *sb, + struct erofs_super_block *dsb, + struct z_erofs_lzma_cfgs *lzma, int size); +#else +static inline int z_erofs_lzma_init(void) { return 0; } +static inline int z_erofs_lzma_exit(void) { return 0; } +static inline int z_erofs_load_lzma_config(struct super_block *sb, + struct erofs_super_block *dsb, + struct z_erofs_lzma_cfgs *lzma, int size) { + if (lzma) { + erofs_err(sb, "lzma algorithm isn't enabled"); + return -EINVAL; + } + return 0; +} +#endif /* !CONFIG_EROFS_FS_ZIP */ + #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #endif /* __EROFS_INTERNAL_H */ diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 2cfe1ce0f766..6a969b1e0ee6 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -225,6 +225,9 @@ static int erofs_load_compr_cfgs(struct super_block *sb, case Z_EROFS_COMPRESSION_LZ4: ret = z_erofs_load_lz4_config(sb, dsb, data, size); break; + case Z_EROFS_COMPRESSION_LZMA: + ret = z_erofs_load_lzma_config(sb, dsb, data, size); + break; default: DBG_BUGON(1); ret = -EFAULT; @@ -840,6 +843,10 @@ static int __init erofs_module_init(void) if (err) goto shrinker_err; + err = z_erofs_lzma_init(); + if (err) + goto lzma_err; + erofs_pcpubuf_init(); err = z_erofs_init_zip_subsystem(); if (err) @@ -854,6 +861,8 @@ static int __init erofs_module_init(void) fs_err: z_erofs_exit_zip_subsystem(); zip_err: + z_erofs_lzma_exit(); +lzma_err: erofs_exit_shrinker(); shrinker_err: kmem_cache_destroy(erofs_inode_cachep); @@ -864,11 +873,13 @@ icache_err: static void __exit erofs_module_exit(void) { unregister_filesystem(&erofs_fs_type); - z_erofs_exit_zip_subsystem(); - erofs_exit_shrinker(); - /* Ensure all RCU free inodes are safe before cache is destroyed. */ + /* Ensure all RCU free inodes / pclusters are safe to be destroyed. */ rcu_barrier(); + + z_erofs_exit_zip_subsystem(); + z_erofs_lzma_exit(); + erofs_exit_shrinker(); kmem_cache_destroy(erofs_inode_cachep); erofs_pcpubuf_exit(); } diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 98d3bd25d894..d55e6215cd44 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1404,8 +1404,8 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, if (backmost) { map->m_la = end; - /* TODO: pass in EROFS_GET_BLOCKS_READMORE for LZMA later */ - err = z_erofs_map_blocks_iter(inode, map, 0); + err = z_erofs_map_blocks_iter(inode, map, + EROFS_GET_BLOCKS_READMORE); if (err) return; diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h index 3a008f1b9f78..879df5362777 100644 --- a/fs/erofs/zdata.h +++ b/fs/erofs/zdata.h @@ -94,13 +94,6 @@ struct z_erofs_decompressqueue { } u; }; -#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) -static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, - struct page *page) -{ - return page->mapping == MNGD_MAPPING(sbi); -} - #define Z_EROFS_ONLINEPAGE_COUNT_BITS 2 #define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1) #define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 85d0289429b3..660489a7fb64 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -672,7 +672,10 @@ int z_erofs_map_blocks_iter(struct inode *inode, else map->m_algorithmformat = vi->z_algorithmtype[0]; - if (flags & EROFS_GET_BLOCKS_FIEMAP) { + if ((flags & EROFS_GET_BLOCKS_FIEMAP) || + ((flags & EROFS_GET_BLOCKS_READMORE) && + map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA && + map->m_llen >= EROFS_BLKSIZ)) { err = z_erofs_get_extent_decompressedlen(&m); if (!err) map->m_flags |= EROFS_MAP_FULL_MAPPED;