mirror of
https://github.com/torvalds/linux.git
synced 2024-12-24 11:51:27 +00:00
74675a5850
This patch (as1239) updates the kernel's treatment of Unicode. The character-set conversion routines are well behind the current state of the Unicode specification: They don't recognize the existence of code points beyond plane 0 or of surrogate pairs in the UTF-16 encoding. The old wchar_t 16-bit type is retained because it's still used in lots of places. This shouldn't cause any new problems; if a conversion now results in an invalid 16-bit code then before it must have yielded an undefined code. Difficult-to-read names like "utf_mbstowcs" are replaced with more transparent names like "utf8s_to_utf16s" and the ordering of the parameters is rationalized (buffer lengths come immediate after the pointers they refer to, and the inputs precede the outputs). Fortunately the low-level conversion routines are used in only a few places; the interfaces to the higher-level uni2char and char2uni methods have been left unchanged. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Acked-by: Clemens Ladisch <clemens@ladisch.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
69 lines
1.2 KiB
C
69 lines
1.2 KiB
C
/*
|
|
* Module for handling utf8 just like any other charset.
|
|
* By Urban Widmark 2000
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/string.h>
|
|
#include <linux/nls.h>
|
|
#include <linux/errno.h>
|
|
|
|
static unsigned char identity[256];
|
|
|
|
static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
|
|
{
|
|
int n;
|
|
|
|
if (boundlen <= 0)
|
|
return -ENAMETOOLONG;
|
|
|
|
n = utf32_to_utf8(uni, out, boundlen);
|
|
if (n < 0) {
|
|
*out = '?';
|
|
return -EINVAL;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
|
|
{
|
|
int n;
|
|
unicode_t u;
|
|
|
|
n = utf8_to_utf32(rawstring, boundlen, &u);
|
|
if (n < 0 || u > MAX_WCHAR_T) {
|
|
*uni = 0x003f; /* ? */
|
|
return -EINVAL;
|
|
}
|
|
*uni = (wchar_t) u;
|
|
return n;
|
|
}
|
|
|
|
static struct nls_table table = {
|
|
.charset = "utf8",
|
|
.uni2char = uni2char,
|
|
.char2uni = char2uni,
|
|
.charset2lower = identity, /* no conversion */
|
|
.charset2upper = identity,
|
|
.owner = THIS_MODULE,
|
|
};
|
|
|
|
static int __init init_nls_utf8(void)
|
|
{
|
|
int i;
|
|
for (i=0; i<256; i++)
|
|
identity[i] = i;
|
|
|
|
return register_nls(&table);
|
|
}
|
|
|
|
static void __exit exit_nls_utf8(void)
|
|
{
|
|
unregister_nls(&table);
|
|
}
|
|
|
|
module_init(init_nls_utf8)
|
|
module_exit(exit_nls_utf8)
|
|
MODULE_LICENSE("Dual BSD/GPL");
|