unicode: Add utf8-data module

utf8data.h contains a large database table which is an auto-generated
decodification trie for the unicode normalization functions.

Allow building it into a separate module.

Based on a patch from Shreeya Patel <shreeya.patel@collabora.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
This commit is contained in:
Christoph Hellwig
2021-09-15 09:00:05 +02:00
committed by Gabriel Krisman Bertazi
parent 6ca99ce756
commit 2b3d047870
9 changed files with 126 additions and 91 deletions

View File

@@ -13,25 +13,7 @@
#include <linux/module.h>
#include <linux/unicode.h>
int utf8version_is_supported(unsigned int version);
/*
* Look for the correct const struct utf8data for a unicode version.
* Returns NULL if the version requested is too new.
*
* Two normalization forms are supported: nfdi and nfdicf.
*
* nfdi:
* - Apply unicode normalization form NFD.
* - Remove any Default_Ignorable_Code_Point.
*
* nfdicf:
* - Apply unicode normalization form NFD.
* - Remove any Default_Ignorable_Code_Point.
* - Apply a full casefold (C + F).
*/
extern const struct utf8data *utf8nfdi(unsigned int maxage);
extern const struct utf8data *utf8nfdicf(unsigned int maxage);
int utf8version_is_supported(const struct unicode_map *um, unsigned int version);
/*
* Determine the length of the normalized from of the string,
@@ -78,4 +60,24 @@ int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
*/
extern int utf8byte(struct utf8cursor *u8c);
struct utf8data {
unsigned int maxage;
unsigned int offset;
};
struct utf8data_table {
const unsigned int *utf8agetab;
int utf8agetab_size;
const struct utf8data *utf8nfdicfdata;
int utf8nfdicfdata_size;
const struct utf8data *utf8nfdidata;
int utf8nfdidata_size;
const unsigned char *utf8data;
};
extern struct utf8data_table utf8_data_table;
#endif /* UTF8NORM_H */