984f251fee
The constant MAX_UTF8_PER_UTF16 is used to calculate
required memory when converting from UTF-16 to UTF-8.
If this constant is too big we waste memory.
A code point encoded by one UTF-16 symbol is converted to a
maximum of three UTF-8 symbols, e.g.
0xffff could be encoded as 0xef 0xbf 0xbf.
The first byte carries four bits, the second and third byte
carry six bits each.
A code point encoded by two UTF-16 symbols is converted to four
UTF-8 symbols.
So in this case we need a maximum of two UTF-8 symbols per
UTF-16 symbol.
As the overall maximum is three UTF-8 symobls per UTF-16 symbol
we need MAX_UTF8_PER_UTF16 = 3.
Fixes: 78178bb0c9
lib: add some utf16 handling helpers
Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Signed-off-by: Alexander Graf <agraf@suse.de>
66 lines
1.8 KiB
C
66 lines
1.8 KiB
C
/*
|
|
* charset conversion utils
|
|
*
|
|
* Copyright (c) 2017 Rob Clark
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0+
|
|
*/
|
|
|
|
#ifndef __CHARSET_H_
|
|
#define __CHARSET_H_
|
|
|
|
#define MAX_UTF8_PER_UTF16 3
|
|
|
|
/**
|
|
* utf16_strlen() - Get the length of an utf16 string
|
|
*
|
|
* Returns the number of 16 bit characters in an utf16 string, not
|
|
* including the terminating NULL character.
|
|
*
|
|
* @in the string to measure
|
|
* @return the string length
|
|
*/
|
|
size_t utf16_strlen(const uint16_t *in);
|
|
|
|
/**
|
|
* utf16_strnlen() - Get the length of a fixed-size utf16 string.
|
|
*
|
|
* Returns the number of 16 bit characters in an utf16 string,
|
|
* not including the terminating NULL character, but at most
|
|
* 'count' number of characters. In doing this, utf16_strnlen()
|
|
* looks at only the first 'count' characters.
|
|
*
|
|
* @in the string to measure
|
|
* @count the maximum number of characters to count
|
|
* @return the string length, up to a maximum of 'count'
|
|
*/
|
|
size_t utf16_strnlen(const uint16_t *in, size_t count);
|
|
|
|
/**
|
|
* utf16_strcpy() - UTF16 equivalent of strcpy()
|
|
*/
|
|
uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src);
|
|
|
|
/**
|
|
* utf16_strdup() - UTF16 equivalent of strdup()
|
|
*/
|
|
uint16_t *utf16_strdup(const uint16_t *s);
|
|
|
|
/**
|
|
* utf16_to_utf8() - Convert an utf16 string to utf8
|
|
*
|
|
* Converts 'size' characters of the utf16 string 'src' to utf8
|
|
* written to the 'dest' buffer.
|
|
*
|
|
* NOTE that a single utf16 character can generate up to 3 utf8
|
|
* characters. See MAX_UTF8_PER_UTF16.
|
|
*
|
|
* @dest the destination buffer to write the utf8 characters
|
|
* @src the source utf16 string
|
|
* @size the number of utf16 characters to convert
|
|
* @return the pointer to the first unwritten byte in 'dest'
|
|
*/
|
|
uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size);
|
|
|
|
#endif /* __CHARSET_H_ */
|