forked from Minki/linux
NLS: update handling of Unicode
This patch (as1239) updates the kernel's treatment of Unicode. The character-set conversion routines are well behind the current state of the Unicode specification: They don't recognize the existence of code points beyond plane 0 or of surrogate pairs in the UTF-16 encoding. The old wchar_t 16-bit type is retained because it's still used in lots of places. This shouldn't cause any new problems; if a conversion now results in an invalid 16-bit code then before it must have yielded an undefined code. Difficult-to-read names like "utf_mbstowcs" are replaced with more transparent names like "utf8s_to_utf16s" and the ordering of the parameters is rationalized (buffer lengths come immediate after the pointers they refer to, and the inputs precede the outputs). Fortunately the low-level conversion routines are used in only a few places; the interfaces to the higher-level uni2char and char2uni methods have been left unchanged. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Acked-by: Clemens Ladisch <clemens@ladisch.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
This commit is contained in:
parent
a853a3d4eb
commit
74675a5850
@ -780,14 +780,13 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
|
|||||||
{
|
{
|
||||||
unsigned char *tbuf;
|
unsigned char *tbuf;
|
||||||
int err;
|
int err;
|
||||||
unsigned int u;
|
|
||||||
|
|
||||||
if (dev->state == USB_STATE_SUSPENDED)
|
if (dev->state == USB_STATE_SUSPENDED)
|
||||||
return -EHOSTUNREACH;
|
return -EHOSTUNREACH;
|
||||||
if (size <= 0 || !buf || !index)
|
if (size <= 0 || !buf || !index)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
buf[0] = 0;
|
buf[0] = 0;
|
||||||
tbuf = kmalloc(256 + 2, GFP_NOIO);
|
tbuf = kmalloc(256, GFP_NOIO);
|
||||||
if (!tbuf)
|
if (!tbuf)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
@ -814,12 +813,9 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
|
|||||||
if (err < 0)
|
if (err < 0)
|
||||||
goto errout;
|
goto errout;
|
||||||
|
|
||||||
for (u = 2; u < err; u += 2)
|
|
||||||
le16_to_cpus((u16 *)&tbuf[u]);
|
|
||||||
tbuf[u] = 0;
|
|
||||||
tbuf[u + 1] = 0;
|
|
||||||
size--; /* leave room for trailing NULL char in output buffer */
|
size--; /* leave room for trailing NULL char in output buffer */
|
||||||
err = utf8_wcstombs(buf, (u16 *)&tbuf[2], size);
|
err = utf16s_to_utf8s((wchar_t *) &tbuf[2], (err - 2) / 2,
|
||||||
|
UTF16_LITTLE_ENDIAN, buf, size);
|
||||||
buf[err] = 0;
|
buf[err] = 0;
|
||||||
|
|
||||||
if (tbuf[1] != USB_DT_STRING)
|
if (tbuf[1] != USB_DT_STRING)
|
||||||
|
@ -513,7 +513,7 @@ befs_utf2nls(struct super_block *sb, const char *in,
|
|||||||
{
|
{
|
||||||
struct nls_table *nls = BEFS_SB(sb)->nls;
|
struct nls_table *nls = BEFS_SB(sb)->nls;
|
||||||
int i, o;
|
int i, o;
|
||||||
wchar_t uni;
|
unicode_t uni;
|
||||||
int unilen, utflen;
|
int unilen, utflen;
|
||||||
char *result;
|
char *result;
|
||||||
/* The utf8->nls conversion won't make the final nls string bigger
|
/* The utf8->nls conversion won't make the final nls string bigger
|
||||||
@ -539,16 +539,16 @@ befs_utf2nls(struct super_block *sb, const char *in,
|
|||||||
for (i = o = 0; i < in_len; i += utflen, o += unilen) {
|
for (i = o = 0; i < in_len; i += utflen, o += unilen) {
|
||||||
|
|
||||||
/* convert from UTF-8 to Unicode */
|
/* convert from UTF-8 to Unicode */
|
||||||
utflen = utf8_mbtowc(&uni, &in[i], in_len - i);
|
utflen = utf8_to_utf32(&in[i], in_len - i, &uni);
|
||||||
if (utflen < 0) {
|
if (utflen < 0)
|
||||||
goto conv_err;
|
goto conv_err;
|
||||||
}
|
|
||||||
|
|
||||||
/* convert from Unicode to nls */
|
/* convert from Unicode to nls */
|
||||||
unilen = nls->uni2char(uni, &result[o], in_len - o);
|
if (uni > MAX_WCHAR_T)
|
||||||
if (unilen < 0) {
|
goto conv_err;
|
||||||
|
unilen = nls->uni2char(uni, &result[o], in_len - o);
|
||||||
|
if (unilen < 0)
|
||||||
goto conv_err;
|
goto conv_err;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
result[o] = '\0';
|
result[o] = '\0';
|
||||||
*out_len = o;
|
*out_len = o;
|
||||||
@ -619,15 +619,13 @@ befs_nls2utf(struct super_block *sb, const char *in,
|
|||||||
|
|
||||||
/* convert from nls to unicode */
|
/* convert from nls to unicode */
|
||||||
unilen = nls->char2uni(&in[i], in_len - i, &uni);
|
unilen = nls->char2uni(&in[i], in_len - i, &uni);
|
||||||
if (unilen < 0) {
|
if (unilen < 0)
|
||||||
goto conv_err;
|
goto conv_err;
|
||||||
}
|
|
||||||
|
|
||||||
/* convert from unicode to UTF-8 */
|
/* convert from unicode to UTF-8 */
|
||||||
utflen = utf8_wctomb(&result[o], uni, 3);
|
utflen = utf32_to_utf8(uni, &result[o], 3);
|
||||||
if (utflen <= 0) {
|
if (utflen <= 0)
|
||||||
goto conv_err;
|
goto conv_err;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
result[o] = '\0';
|
result[o] = '\0';
|
||||||
|
29
fs/fat/dir.c
29
fs/fat/dir.c
@ -22,6 +22,19 @@
|
|||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
#include "fat.h"
|
#include "fat.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Maximum buffer size of short name.
|
||||||
|
* [(MSDOS_NAME + '.') * max one char + nul]
|
||||||
|
* For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
|
||||||
|
*/
|
||||||
|
#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
|
||||||
|
/*
|
||||||
|
* Maximum buffer size of unicode chars from slots.
|
||||||
|
* [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
|
||||||
|
*/
|
||||||
|
#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
|
||||||
|
#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
|
||||||
|
|
||||||
static inline loff_t fat_make_i_pos(struct super_block *sb,
|
static inline loff_t fat_make_i_pos(struct super_block *sb,
|
||||||
struct buffer_head *bh,
|
struct buffer_head *bh,
|
||||||
struct msdos_dir_entry *de)
|
struct msdos_dir_entry *de)
|
||||||
@ -171,7 +184,8 @@ static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
|
|||||||
unsigned char *buf, int size)
|
unsigned char *buf, int size)
|
||||||
{
|
{
|
||||||
if (sbi->options.utf8)
|
if (sbi->options.utf8)
|
||||||
return utf8_wcstombs(buf, uni, size);
|
return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS,
|
||||||
|
UTF16_HOST_ENDIAN, buf, size);
|
||||||
else
|
else
|
||||||
return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
|
return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
|
||||||
sbi->nls_io);
|
sbi->nls_io);
|
||||||
@ -324,19 +338,6 @@ parse_long:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Maximum buffer size of short name.
|
|
||||||
* [(MSDOS_NAME + '.') * max one char + nul]
|
|
||||||
* For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
|
|
||||||
*/
|
|
||||||
#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
|
|
||||||
/*
|
|
||||||
* Maximum buffer size of unicode chars from slots.
|
|
||||||
* [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
|
|
||||||
*/
|
|
||||||
#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
|
|
||||||
#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return values: negative -> error, 0 -> not found, positive -> found,
|
* Return values: negative -> error, 0 -> not found, positive -> found,
|
||||||
* value is the total amount of slots, including the shortname entry.
|
* value is the total amount of slots, including the shortname entry.
|
||||||
|
@ -502,11 +502,11 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
|
|||||||
if (utf8) {
|
if (utf8) {
|
||||||
int name_len = strlen(name);
|
int name_len = strlen(name);
|
||||||
|
|
||||||
*outlen = utf8_mbstowcs((wchar_t *)outname, name, PATH_MAX);
|
*outlen = utf8s_to_utf16s(name, PATH_MAX, (wchar_t *) outname);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We stripped '.'s before and set len appropriately,
|
* We stripped '.'s before and set len appropriately,
|
||||||
* but utf8_mbstowcs doesn't care about len
|
* but utf8s_to_utf16s doesn't care about len
|
||||||
*/
|
*/
|
||||||
*outlen -= (name_len - len);
|
*outlen -= (name_len - len);
|
||||||
|
|
||||||
|
@ -37,37 +37,6 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
|
|||||||
return (op - ascii);
|
return (op - ascii);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Convert big endian wide character string to utf8 */
|
|
||||||
static int
|
|
||||||
wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen)
|
|
||||||
{
|
|
||||||
const __u8 *ip;
|
|
||||||
__u8 *op;
|
|
||||||
int size;
|
|
||||||
__u16 c;
|
|
||||||
|
|
||||||
op = s;
|
|
||||||
ip = pwcs;
|
|
||||||
while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) {
|
|
||||||
c = (*ip << 8) | ip[1];
|
|
||||||
if (c > 0x7f) {
|
|
||||||
size = utf8_wctomb(op, c, maxlen);
|
|
||||||
if (size == -1) {
|
|
||||||
/* Ignore character and move on */
|
|
||||||
maxlen--;
|
|
||||||
} else {
|
|
||||||
op += size;
|
|
||||||
maxlen -= size;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
*op++ = (__u8) c;
|
|
||||||
}
|
|
||||||
ip += 2;
|
|
||||||
inlen--;
|
|
||||||
}
|
|
||||||
return (op - s);
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
int
|
||||||
get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
|
get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
|
||||||
{
|
{
|
||||||
@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st
|
|||||||
nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
|
nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
|
||||||
|
|
||||||
if (utf8) {
|
if (utf8) {
|
||||||
len = wcsntombs_be(outname, de->name,
|
len = utf16s_to_utf8s((const wchar_t *) de->name,
|
||||||
de->name_len[0] >> 1, PAGE_SIZE);
|
de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
|
||||||
|
outname, PAGE_SIZE);
|
||||||
} else {
|
} else {
|
||||||
len = uni16_to_x8(outname, (__be16 *) de->name,
|
len = uni16_to_x8(outname, (__be16 *) de->name,
|
||||||
de->name_len[0] >> 1, nls);
|
de->name_len[0] >> 1, nls);
|
||||||
|
@ -1113,11 +1113,13 @@ ncp__io2vol(struct ncp_server *server, unsigned char *vname, unsigned int *vlen,
|
|||||||
|
|
||||||
if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) {
|
if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) {
|
||||||
int k;
|
int k;
|
||||||
|
unicode_t u;
|
||||||
|
|
||||||
k = utf8_mbtowc(&ec, iname, iname_end - iname);
|
k = utf8_to_utf32(iname, iname_end - iname, &u);
|
||||||
if (k < 0)
|
if (k < 0 || u > MAX_WCHAR_T)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
iname += k;
|
iname += k;
|
||||||
|
ec = u;
|
||||||
} else {
|
} else {
|
||||||
if (*iname == NCP_ESC) {
|
if (*iname == NCP_ESC) {
|
||||||
int k;
|
int k;
|
||||||
@ -1214,7 +1216,7 @@ ncp__vol2io(struct ncp_server *server, unsigned char *iname, unsigned int *ilen,
|
|||||||
if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) {
|
if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) {
|
||||||
int k;
|
int k;
|
||||||
|
|
||||||
k = utf8_wctomb(iname, ec, iname_end - iname);
|
k = utf32_to_utf8(ec, iname, iname_end - iname);
|
||||||
if (k < 0) {
|
if (k < 0) {
|
||||||
err = -ENAMETOOLONG;
|
err = -ENAMETOOLONG;
|
||||||
goto quit;
|
goto quit;
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include <linux/errno.h>
|
#include <linux/errno.h>
|
||||||
#include <linux/kmod.h>
|
#include <linux/kmod.h>
|
||||||
#include <linux/spinlock.h>
|
#include <linux/spinlock.h>
|
||||||
|
#include <asm/byteorder.h>
|
||||||
|
|
||||||
static struct nls_table default_table;
|
static struct nls_table default_table;
|
||||||
static struct nls_table *tables = &default_table;
|
static struct nls_table *tables = &default_table;
|
||||||
@ -43,10 +44,17 @@ static const struct utf8_table utf8_table[] =
|
|||||||
{0, /* end of table */}
|
{0, /* end of table */}
|
||||||
};
|
};
|
||||||
|
|
||||||
int
|
#define UNICODE_MAX 0x0010ffff
|
||||||
utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
|
#define PLANE_SIZE 0x00010000
|
||||||
|
|
||||||
|
#define SURROGATE_MASK 0xfffff800
|
||||||
|
#define SURROGATE_PAIR 0x0000d800
|
||||||
|
#define SURROGATE_LOW 0x00000400
|
||||||
|
#define SURROGATE_BITS 0x000003ff
|
||||||
|
|
||||||
|
int utf8_to_utf32(const u8 *s, int len, unicode_t *pu)
|
||||||
{
|
{
|
||||||
long l;
|
unsigned long l;
|
||||||
int c0, c, nc;
|
int c0, c, nc;
|
||||||
const struct utf8_table *t;
|
const struct utf8_table *t;
|
||||||
|
|
||||||
@ -57,12 +65,13 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
|
|||||||
nc++;
|
nc++;
|
||||||
if ((c0 & t->cmask) == t->cval) {
|
if ((c0 & t->cmask) == t->cval) {
|
||||||
l &= t->lmask;
|
l &= t->lmask;
|
||||||
if (l < t->lval)
|
if (l < t->lval || l > UNICODE_MAX ||
|
||||||
|
(l & SURROGATE_MASK) == SURROGATE_PAIR)
|
||||||
return -1;
|
return -1;
|
||||||
*p = l;
|
*pu = (unicode_t) l;
|
||||||
return nc;
|
return nc;
|
||||||
}
|
}
|
||||||
if (n <= nc)
|
if (len <= nc)
|
||||||
return -1;
|
return -1;
|
||||||
s++;
|
s++;
|
||||||
c = (*s ^ 0x80) & 0xFF;
|
c = (*s ^ 0x80) & 0xFF;
|
||||||
@ -72,76 +81,119 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
|
|||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(utf8_to_utf32);
|
||||||
|
|
||||||
int
|
int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
|
||||||
utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n)
|
|
||||||
{
|
{
|
||||||
__u16 *op;
|
unsigned long l;
|
||||||
const __u8 *ip;
|
|
||||||
int size;
|
|
||||||
|
|
||||||
op = pwcs;
|
|
||||||
ip = s;
|
|
||||||
while (*ip && n > 0) {
|
|
||||||
if (*ip & 0x80) {
|
|
||||||
size = utf8_mbtowc(op, ip, n);
|
|
||||||
if (size == -1) {
|
|
||||||
/* Ignore character and move on */
|
|
||||||
ip++;
|
|
||||||
n--;
|
|
||||||
} else {
|
|
||||||
op++;
|
|
||||||
ip += size;
|
|
||||||
n -= size;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
*op++ = *ip++;
|
|
||||||
n--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return (op - pwcs);
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
|
|
||||||
{
|
|
||||||
long l;
|
|
||||||
int c, nc;
|
int c, nc;
|
||||||
const struct utf8_table *t;
|
const struct utf8_table *t;
|
||||||
|
|
||||||
if (!s)
|
if (!s)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
l = wc;
|
l = u;
|
||||||
|
if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR)
|
||||||
|
return -1;
|
||||||
|
|
||||||
nc = 0;
|
nc = 0;
|
||||||
for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
|
for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
|
||||||
nc++;
|
nc++;
|
||||||
if (l <= t->lmask) {
|
if (l <= t->lmask) {
|
||||||
c = t->shift;
|
c = t->shift;
|
||||||
*s = t->cval | (l >> c);
|
*s = (u8) (t->cval | (l >> c));
|
||||||
while (c > 0) {
|
while (c > 0) {
|
||||||
c -= 6;
|
c -= 6;
|
||||||
s++;
|
s++;
|
||||||
*s = 0x80 | ((l >> c) & 0x3F);
|
*s = (u8) (0x80 | ((l >> c) & 0x3F));
|
||||||
}
|
}
|
||||||
return nc;
|
return nc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(utf32_to_utf8);
|
||||||
|
|
||||||
int
|
int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
|
||||||
utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
|
|
||||||
{
|
{
|
||||||
const __u16 *ip;
|
u16 *op;
|
||||||
__u8 *op;
|
|
||||||
int size;
|
int size;
|
||||||
|
unicode_t u;
|
||||||
|
|
||||||
|
op = pwcs;
|
||||||
|
while (*s && len > 0) {
|
||||||
|
if (*s & 0x80) {
|
||||||
|
size = utf8_to_utf32(s, len, &u);
|
||||||
|
if (size < 0) {
|
||||||
|
/* Ignore character and move on */
|
||||||
|
size = 1;
|
||||||
|
} else if (u >= PLANE_SIZE) {
|
||||||
|
u -= PLANE_SIZE;
|
||||||
|
*op++ = (wchar_t) (SURROGATE_PAIR |
|
||||||
|
((u >> 10) & SURROGATE_BITS));
|
||||||
|
*op++ = (wchar_t) (SURROGATE_PAIR |
|
||||||
|
SURROGATE_LOW |
|
||||||
|
(u & SURROGATE_BITS));
|
||||||
|
} else {
|
||||||
|
*op++ = (wchar_t) u;
|
||||||
|
}
|
||||||
|
s += size;
|
||||||
|
len -= size;
|
||||||
|
} else {
|
||||||
|
*op++ = *s++;
|
||||||
|
len--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return op - pwcs;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(utf8s_to_utf16s);
|
||||||
|
|
||||||
|
static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian)
|
||||||
|
{
|
||||||
|
switch (endian) {
|
||||||
|
default:
|
||||||
|
return c;
|
||||||
|
case UTF16_LITTLE_ENDIAN:
|
||||||
|
return __le16_to_cpu(c);
|
||||||
|
case UTF16_BIG_ENDIAN:
|
||||||
|
return __be16_to_cpu(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian,
|
||||||
|
u8 *s, int maxlen)
|
||||||
|
{
|
||||||
|
u8 *op;
|
||||||
|
int size;
|
||||||
|
unsigned long u, v;
|
||||||
|
|
||||||
op = s;
|
op = s;
|
||||||
ip = pwcs;
|
while (len > 0 && maxlen > 0) {
|
||||||
while (*ip && maxlen > 0) {
|
u = get_utf16(*pwcs, endian);
|
||||||
if (*ip > 0x7f) {
|
if (!u)
|
||||||
size = utf8_wctomb(op, *ip, maxlen);
|
break;
|
||||||
|
pwcs++;
|
||||||
|
len--;
|
||||||
|
if (u > 0x7f) {
|
||||||
|
if ((u & SURROGATE_MASK) == SURROGATE_PAIR) {
|
||||||
|
if (u & SURROGATE_LOW) {
|
||||||
|
/* Ignore character and move on */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (len <= 0)
|
||||||
|
break;
|
||||||
|
v = get_utf16(*pwcs, endian);
|
||||||
|
if ((v & SURROGATE_MASK) != SURROGATE_PAIR ||
|
||||||
|
!(v & SURROGATE_LOW)) {
|
||||||
|
/* Ignore character and move on */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10)
|
||||||
|
+ (v & SURROGATE_BITS);
|
||||||
|
pwcs++;
|
||||||
|
len--;
|
||||||
|
}
|
||||||
|
size = utf32_to_utf8(u, op, maxlen);
|
||||||
if (size == -1) {
|
if (size == -1) {
|
||||||
/* Ignore character and move on */
|
/* Ignore character and move on */
|
||||||
} else {
|
} else {
|
||||||
@ -149,13 +201,13 @@ utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
|
|||||||
maxlen -= size;
|
maxlen -= size;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
*op++ = (__u8) *ip;
|
*op++ = (u8) u;
|
||||||
maxlen--;
|
maxlen--;
|
||||||
}
|
}
|
||||||
ip++;
|
|
||||||
}
|
}
|
||||||
return (op - s);
|
return op - s;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(utf16s_to_utf8s);
|
||||||
|
|
||||||
int register_nls(struct nls_table * nls)
|
int register_nls(struct nls_table * nls)
|
||||||
{
|
{
|
||||||
@ -467,9 +519,5 @@ EXPORT_SYMBOL(unregister_nls);
|
|||||||
EXPORT_SYMBOL(unload_nls);
|
EXPORT_SYMBOL(unload_nls);
|
||||||
EXPORT_SYMBOL(load_nls);
|
EXPORT_SYMBOL(load_nls);
|
||||||
EXPORT_SYMBOL(load_nls_default);
|
EXPORT_SYMBOL(load_nls_default);
|
||||||
EXPORT_SYMBOL(utf8_mbtowc);
|
|
||||||
EXPORT_SYMBOL(utf8_mbstowcs);
|
|
||||||
EXPORT_SYMBOL(utf8_wctomb);
|
|
||||||
EXPORT_SYMBOL(utf8_wcstombs);
|
|
||||||
|
|
||||||
MODULE_LICENSE("Dual BSD/GPL");
|
MODULE_LICENSE("Dual BSD/GPL");
|
||||||
|
@ -15,7 +15,11 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
|
|||||||
{
|
{
|
||||||
int n;
|
int n;
|
||||||
|
|
||||||
if ( (n = utf8_wctomb(out, uni, boundlen)) == -1) {
|
if (boundlen <= 0)
|
||||||
|
return -ENAMETOOLONG;
|
||||||
|
|
||||||
|
n = utf32_to_utf8(uni, out, boundlen);
|
||||||
|
if (n < 0) {
|
||||||
*out = '?';
|
*out = '?';
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
@ -25,11 +29,14 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
|
|||||||
static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
|
static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
|
||||||
{
|
{
|
||||||
int n;
|
int n;
|
||||||
|
unicode_t u;
|
||||||
|
|
||||||
if ( (n = utf8_mbtowc(uni, rawstring, boundlen)) == -1) {
|
n = utf8_to_utf32(rawstring, boundlen, &u);
|
||||||
|
if (n < 0 || u > MAX_WCHAR_T) {
|
||||||
*uni = 0x003f; /* ? */
|
*uni = 0x003f; /* ? */
|
||||||
n = -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
*uni = (wchar_t) u;
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,8 +3,23 @@
|
|||||||
|
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
|
|
||||||
/* unicode character */
|
/* Unicode has changed over the years. Unicode code points no longer
|
||||||
typedef __u16 wchar_t;
|
* fit into 16 bits; as of Unicode 5 valid code points range from 0
|
||||||
|
* to 0x10ffff (17 planes, where each plane holds 65536 code points).
|
||||||
|
*
|
||||||
|
* The original decision to represent Unicode characters as 16-bit
|
||||||
|
* wchar_t values is now outdated. But plane 0 still includes the
|
||||||
|
* most commonly used characters, so we will retain it. The newer
|
||||||
|
* 32-bit unicode_t type can be used when it is necessary to
|
||||||
|
* represent the full Unicode character set.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Plane-0 Unicode character */
|
||||||
|
typedef u16 wchar_t;
|
||||||
|
#define MAX_WCHAR_T 0xffff
|
||||||
|
|
||||||
|
/* Arbitrary Unicode character */
|
||||||
|
typedef u32 unicode_t;
|
||||||
|
|
||||||
struct nls_table {
|
struct nls_table {
|
||||||
const char *charset;
|
const char *charset;
|
||||||
@ -21,6 +36,13 @@ struct nls_table {
|
|||||||
/* this value hold the maximum octet of charset */
|
/* this value hold the maximum octet of charset */
|
||||||
#define NLS_MAX_CHARSET_SIZE 6 /* for UTF-8 */
|
#define NLS_MAX_CHARSET_SIZE 6 /* for UTF-8 */
|
||||||
|
|
||||||
|
/* Byte order for UTF-16 strings */
|
||||||
|
enum utf16_endian {
|
||||||
|
UTF16_HOST_ENDIAN,
|
||||||
|
UTF16_LITTLE_ENDIAN,
|
||||||
|
UTF16_BIG_ENDIAN
|
||||||
|
};
|
||||||
|
|
||||||
/* nls.c */
|
/* nls.c */
|
||||||
extern int register_nls(struct nls_table *);
|
extern int register_nls(struct nls_table *);
|
||||||
extern int unregister_nls(struct nls_table *);
|
extern int unregister_nls(struct nls_table *);
|
||||||
@ -28,10 +50,11 @@ extern struct nls_table *load_nls(char *);
|
|||||||
extern void unload_nls(struct nls_table *);
|
extern void unload_nls(struct nls_table *);
|
||||||
extern struct nls_table *load_nls_default(void);
|
extern struct nls_table *load_nls_default(void);
|
||||||
|
|
||||||
extern int utf8_mbtowc(wchar_t *, const __u8 *, int);
|
extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
|
||||||
extern int utf8_mbstowcs(wchar_t *, const __u8 *, int);
|
extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
|
||||||
extern int utf8_wctomb(__u8 *, wchar_t, int);
|
extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs);
|
||||||
extern int utf8_wcstombs(__u8 *, const wchar_t *, int);
|
extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
|
||||||
|
enum utf16_endian endian, u8 *s, int maxlen);
|
||||||
|
|
||||||
static inline unsigned char nls_tolower(struct nls_table *t, unsigned char c)
|
static inline unsigned char nls_tolower(struct nls_table *t, unsigned char c)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user