diff --git a/third_party/icu4c/libicu.ndk.json b/third_party/icu4c/libicu.ndk.json index 42149b48ee8e15acef291cf765991bbae222c418..358c158556a82b16edb1dc7ce7f57b21f152f921 100644 --- a/third_party/icu4c/libicu.ndk.json +++ b/third_party/icu4c/libicu.ndk.json @@ -2090,5 +2090,377 @@ { "first_introduced": "15", "name":"u_errorName" + }, + { + "first_introduced": "21", + "name":"u_charAge" + }, + { + "first_introduced": "21", + "name":"u_getUnicodeVersion" + }, + { + "first_introduced": "21", + "name":"uldn_open" + }, + { + "first_introduced": "21", + "name":"uldn_close" + }, + { + "first_introduced": "21", + "name":"uldn_getLocale" + }, + { + "first_introduced": "21", + "name":"uldn_getDialectHandling" + }, + { + "first_introduced": "21", + "name":"uldn_localeDisplayName" + }, + { + "first_introduced": "21", + "name":"uldn_languageDisplayName" + }, + { + "first_introduced": "21", + "name":"uldn_scriptDisplayName" + }, + { + "first_introduced": "21", + "name":"uldn_scriptCodeDisplayName" + }, + { + "first_introduced": "21", + "name":"uldn_regionDisplayName" + }, + { + "first_introduced": "21", + "name":"uldn_variantDisplayName" + }, + { + "first_introduced": "21", + "name":"uldn_keyDisplayName" + }, + { + "first_introduced": "21", + "name":"uldn_keyValueDisplayName" + }, + { + "first_introduced": "21", + "name":"uldn_openForContext" + }, + { + "first_introduced": "21", + "name":"uldn_getContext" + }, + { + "first_introduced": "21", + "name":"ulocdata_getCLDRVersion" + }, + { + "first_introduced": "21", + "name":"u_versionFromString" + }, + { + "first_introduced": "21", + "name":"u_versionFromUString" + }, + { + "first_introduced": "21", + "name":"u_versionToString" + }, + { + "first_introduced": "21", + "name":"u_getVersion" + }, + { + "first_introduced": "21", + "name":"UCPMapValueFilter" + }, + { + "first_introduced": "21", + "name":"U_IS_UNICODE_NONCHAR" + }, + { + "first_introduced": "21", + "name":"U_IS_UNICODE_CHAR" + }, + { + "first_introduced": "21", + "name":"U_IS_BMP" + }, + { + "first_introduced": "21", + "name":"U_IS_SUPPLEMENTARY" + }, + { + "first_introduced": "21", + "name":"U_IS_LEAD" + }, + { + "first_introduced": "21", + "name":"U_IS_TRAIL" + }, + { + "first_introduced": "21", + "name":"U_IS_SURROGATE" + }, + { + "first_introduced": "21", + "name":"U_IS_SURROGATE_LEAD" + }, + { + "first_introduced": "21", + "name":"U_IS_SURROGATE_TRAIL" + }, + { + "first_introduced": "21", + "name":"U16_IS_SINGLE" + }, + { + "first_introduced": "21", + "name":"U16_IS_LEAD" + }, + { + "first_introduced": "21", + "name":"U16_IS_TRAIL" + }, + { + "first_introduced": "21", + "name":"U16_IS_SURROGATE" + }, + { + "first_introduced": "21", + "name":"U16_IS_SURROGATE_LEAD" + }, + { + "first_introduced": "21", + "name":"U16_IS_SURROGATE_TRAIL" + }, + { + "first_introduced": "21", + "name":"U16_GET_SUPPLEMENTARY" + }, + { + "first_introduced": "21", + "name":"U16_LEAD" + }, + { + "first_introduced": "21", + "name":"U16_TRAIL" + }, + { + "first_introduced": "21", + "name":"U16_LENGTH" + }, + { + "first_introduced": "21", + "name":"U16_GET_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U16_GET" + }, + { + "first_introduced": "21", + "name":"U16_GET_OR_FFFD" + }, + { + "first_introduced": "21", + "name":"U16_NEXT_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U16_NEXT" + }, + { + "first_introduced": "21", + "name":"U16_NEXT_OR_FFFD" + }, + { + "first_introduced": "21", + "name":"U16_APPEND_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U16_APPEND" + }, + { + "first_introduced": "21", + "name":"U16_FWD_1_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U16_FWD_1" + }, + { + "first_introduced": "21", + "name":"U16_FWD_N_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U16_FWD_N" + }, + { + "first_introduced": "21", + "name":"U16_SET_CP_START_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U16_SET_CP_START" + }, + { + "first_introduced": "21", + "name":"U16_PREV_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U16_PREV" + }, + { + "first_introduced": "21", + "name":"U16_PREV_OR_FFFD" + }, + { + "first_introduced": "21", + "name":"U16_BACK_1_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U16_BACK_1" + }, + { + "first_introduced": "21", + "name":"U16_BACK_N_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U16_BACK_N" + }, + { + "first_introduced": "21", + "name":"U16_SET_CP_LIMIT_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U16_SET_CP_LIMIT" + }, + { + "first_introduced": "21", + "name":"U8_IS_SINGLE" + }, + { + "first_introduced": "21", + "name":"U8_IS_LEAD" + }, + { + "first_introduced": "21", + "name":"U8_IS_TRAIL" + }, + { + "first_introduced": "21", + "name":"U8_LENGTH" + }, + { + "first_introduced": "21", + "name":"U8_GET_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U8_GET" + }, + { + "first_introduced": "21", + "name":"U8_GET_OR_FFFD" + }, + { + "first_introduced": "21", + "name":"U8_NEXT_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U8_NEXT" + }, + { + "first_introduced": "21", + "name":"U8_NEXT_OR_FFFD" + }, + { + "first_introduced": "21", + "name":"U8_INTERNAL_NEXT_OR_SUB" + }, + { + "first_introduced": "21", + "name":"U8_APPEND_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U8_APPEND" + }, + { + "first_introduced": "21", + "name":"U8_FWD_1_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U8_FWD_1" + }, + { + "first_introduced": "21", + "name":"U8_FWD_N_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U8_FWD_N" + }, + { + "first_introduced": "21", + "name":"U8_SET_CP_START_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U8_SET_CP_START" + }, + { + "first_introduced": "21", + "name":"U8_TRUNCATE_IF_INCOMPLETE" + }, + { + "first_introduced": "21", + "name":"U8_PREV_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U8_PREV" + }, + { + "first_introduced": "21", + "name":"U8_PREV_OR_FFFD" + }, + { + "first_introduced": "21", + "name":"U8_BACK_1_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U8_BACK_1" + }, + { + "first_introduced": "21", + "name":"U8_BACK_N_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U8_BACK_N" + }, + { + "first_introduced": "21", + "name":"U8_SET_CP_LIMIT_UNSAFE" + }, + { + "first_introduced": "21", + "name":"U8_SET_CP_LIMIT" } ] diff --git a/third_party/icu4c/ndk_headers/unicode/putil.h b/third_party/icu4c/ndk_headers/unicode/putil.h new file mode 100644 index 0000000000000000000000000000000000000000..c3b347d27b4a36efd55069945819fd7ba9b370a8 --- /dev/null +++ b/third_party/icu4c/ndk_headers/unicode/putil.h @@ -0,0 +1,67 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : putil.h +* +* Date Name Description +* 05/14/98 nos Creation (content moved here from utypes.h). +* 06/17/99 erm Added IEEE_754 +* 07/22/98 stephen Added IEEEremainder, max, min, trunc +* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity +* 08/24/98 stephen Added longBitsFromDouble +* 03/02/99 stephen Removed openFile(). Added AS400 support. +* 04/15/99 stephen Converted to C +* 11/15/99 helena Integrated S/390 changes for IEEE support. +* 01/11/00 helena Added u_getVersion. +****************************************************************************** +*/ + +#ifndef PUTIL_H +#define PUTIL_H + +#include "unicode/utypes.h" + /** + * \file + * \brief C API: Platform Utilities + */ + +/*==========================================================================*/ +/* Platform utilities */ +/*==========================================================================*/ + +/** + * Platform utilities isolates the platform dependencies of the + * library. For each platform which this code is ported to, these + * functions may have to be re-implemented. + */ + +/** + * @{ + * Filesystem file and path separator characters. + * Example: '/' and ':' on Unix, '\\' and ';' on Windows. + * @stable ICU 2.0 + */ +#if U_PLATFORM_USES_ONLY_WIN32_API +# define U_FILE_SEP_CHAR '\\' +# define U_FILE_ALT_SEP_CHAR '/' +# define U_PATH_SEP_CHAR ';' +# define U_FILE_SEP_STRING "\\" +# define U_FILE_ALT_SEP_STRING "/" +# define U_PATH_SEP_STRING ";" +#else +# define U_FILE_SEP_CHAR '/' +# define U_FILE_ALT_SEP_CHAR '/' +# define U_PATH_SEP_CHAR ':' +# define U_FILE_SEP_STRING "/" +# define U_FILE_ALT_SEP_STRING "/" +# define U_PATH_SEP_STRING ":" +#endif + +#endif diff --git a/third_party/icu4c/ndk_headers/unicode/uchar.h b/third_party/icu4c/ndk_headers/unicode/uchar.h index 1a529e4f8a050b51619a8ec0a5bbb77387a76fa5..a6605f52585abdd814fba094fe775e7217cc95d2 100644 --- a/third_party/icu4c/ndk_headers/unicode/uchar.h +++ b/third_party/icu4c/ndk_headers/unicode/uchar.h @@ -3866,6 +3866,37 @@ u_digit(UChar32 ch, int8_t radix); U_CAPI UChar32 U_EXPORT2 u_forDigit(int32_t digit, int8_t radix); +/** + * Get the "age" of the code point. + * The "age" is the Unicode version when the code point was first + * designated (as a non-character or for Private Use) + * or assigned a character. + * This can be useful to avoid emitting code points to receiving + * processes that do not accept newer characters. + * The data is from the UCD file DerivedAge.txt. + * + * @param c The code point. + * @param versionArray The Unicode version number array, to be filled in. + * + * @stable ICU 2.1 + */ +U_CAPI void U_EXPORT2 +u_charAge(UChar32 c, UVersionInfo versionArray); + +/** + * Gets the Unicode version information. + * The version array is filled in with the version information + * for the Unicode standard that is currently used by ICU. + * For example, Unicode version 3.1.1 is represented as an array with + * the values { 3, 1, 1, 0 }. + * + * @param versionArray an output array that will be filled in with + * the Unicode version number + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +u_getUnicodeVersion(UVersionInfo versionArray); + #if !UCONFIG_NO_NORMALIZATION /** * Get the FC_NFKC_Closure property string for a character. diff --git a/third_party/icu4c/ndk_headers/unicode/ucpmap.h b/third_party/icu4c/ndk_headers/unicode/ucpmap.h new file mode 100644 index 0000000000000000000000000000000000000000..30f337ad130526b9231f5adade5258cc9e181422 --- /dev/null +++ b/third_party/icu4c/ndk_headers/unicode/ucpmap.h @@ -0,0 +1,104 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucpmap.h +// created: 2018sep03 Markus W. Scherer + +#ifndef __UCPMAP_H__ +#define __UCPMAP_H__ + +#include "unicode/utypes.h" + +U_CDECL_BEGIN + +/** + * \file + * \brief C API: This file defines an abstract map from Unicode code points to integer values. + * + * @see UCPMap + * @see UCPTrie + * @see UMutableCPTrie + */ + +/** + * Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values. + * + * @see UCPTrie + * @see UMutableCPTrie + * @stable ICU 63 + */ +typedef struct UCPMap UCPMap; + +/** + * Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates. + * Most users should use UCPMAP_RANGE_NORMAL. + * + * @see ucpmap_getRange + * @see ucptrie_getRange + * @see umutablecptrie_getRange + * @stable ICU 63 + */ +enum UCPMapRangeOption { + /** + * ucpmap_getRange() enumerates all same-value ranges as stored in the map. + * Most users should use this option. + * @stable ICU 63 + */ + UCPMAP_RANGE_NORMAL, + /** + * ucpmap_getRange() enumerates all same-value ranges as stored in the map, + * except that lead surrogates (U+D800..U+DBFF) are treated as having the + * surrogateValue, which is passed to getRange() as a separate parameter. + * The surrogateValue is not transformed via filter(). + * See U_IS_LEAD(c). + * + * Most users should use UCPMAP_RANGE_NORMAL instead. + * + * This option is useful for maps that map surrogate code *units* to + * special values optimized for UTF-16 string processing + * or for special error behavior for unpaired surrogates, + * but those values are not to be associated with the lead surrogate code *points*. + * @stable ICU 63 + */ + UCPMAP_RANGE_FIXED_LEAD_SURROGATES, + /** + * ucpmap_getRange() enumerates all same-value ranges as stored in the map, + * except that all surrogates (U+D800..U+DFFF) are treated as having the + * surrogateValue, which is passed to getRange() as a separate parameter. + * The surrogateValue is not transformed via filter(). + * See U_IS_SURROGATE(c). + * + * Most users should use UCPMAP_RANGE_NORMAL instead. + * + * This option is useful for maps that map surrogate code *units* to + * special values optimized for UTF-16 string processing + * or for special error behavior for unpaired surrogates, + * but those values are not to be associated with the lead surrogate code *points*. + * @stable ICU 63 + */ + UCPMAP_RANGE_FIXED_ALL_SURROGATES +}; +#ifndef U_IN_DOXYGEN +typedef enum UCPMapRangeOption UCPMapRangeOption; +#endif + +/** + * Callback function type: Modifies a map value. + * Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange(). + * The modified value will be returned by the getRange function. + * + * Can be used to ignore some of the value bits, + * make a filter for one of several values, + * return a value index computed from the map value, etc. + * + * @param context an opaque pointer, as passed into the getRange function + * @param value a value from the map + * @return the modified value + * @stable ICU 63 + */ +typedef uint32_t U_CALLCONV +UCPMapValueFilter(const void *context, uint32_t value); + +U_CDECL_END + +#endif diff --git a/third_party/icu4c/ndk_headers/unicode/uldnames.h b/third_party/icu4c/ndk_headers/unicode/uldnames.h new file mode 100644 index 0000000000000000000000000000000000000000..02e24f004972fd0d9284d7a5f228bf133a262b00 --- /dev/null +++ b/third_party/icu4c/ndk_headers/unicode/uldnames.h @@ -0,0 +1,284 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2016, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ + +#ifndef __ULDNAMES_H__ +#define __ULDNAMES_H__ + +/** + * \file + * \brief C API: Provides display names of Locale ids and their components. + */ + +#include "unicode/utypes.h" +#include "unicode/uscript.h" +#include "unicode/udisplaycontext.h" + +/** + * Enum used in LocaleDisplayNames::createInstance. + * @stable ICU 4.4 + */ +typedef enum { + /** + * Use standard names when generating a locale name, + * e.g. en_GB displays as 'English (United Kingdom)'. + * @stable ICU 4.4 + */ + ULDN_STANDARD_NAMES = 0, + /** + * Use dialect names, when generating a locale name, + * e.g. en_GB displays as 'British English'. + * @stable ICU 4.4 + */ + ULDN_DIALECT_NAMES +} UDialectHandling; + +/** + * Opaque C service object type for the locale display names API + * @stable ICU 4.4 + */ +struct ULocaleDisplayNames; + +/** + * C typedef for struct ULocaleDisplayNames. + * @stable ICU 4.4 + */ +typedef struct ULocaleDisplayNames ULocaleDisplayNames; + +#if !UCONFIG_NO_FORMATTING + +/** + * Returns an instance of LocaleDisplayNames that returns names + * formatted for the provided locale, using the provided + * dialectHandling. The usual value for dialectHandling is + * ULOC_STANDARD_NAMES. + * + * @param locale the display locale + * @param dialectHandling how to select names for locales + * @return a ULocaleDisplayNames instance + * @param pErrorCode the status code + * @stable ICU 4.4 + */ +U_CAPI ULocaleDisplayNames * U_EXPORT2 +uldn_open(const char * locale, + UDialectHandling dialectHandling, + UErrorCode *pErrorCode); + +/** + * Closes a ULocaleDisplayNames instance obtained from uldn_open(). + * @param ldn the ULocaleDisplayNames instance to be closed + * @stable ICU 4.4 + */ +U_CAPI void U_EXPORT2 +uldn_close(ULocaleDisplayNames *ldn); + +/* getters for state */ + +/** + * Returns the locale used to determine the display names. This is + * not necessarily the same locale passed to {@link #uldn_open}. + * @param ldn the LocaleDisplayNames instance + * @return the display locale + * @stable ICU 4.4 + */ +U_CAPI const char * U_EXPORT2 +uldn_getLocale(const ULocaleDisplayNames *ldn); + +/** + * Returns the dialect handling used in the display names. + * @param ldn the LocaleDisplayNames instance + * @return the dialect handling enum + * @stable ICU 4.4 + */ +U_CAPI UDialectHandling U_EXPORT2 +uldn_getDialectHandling(const ULocaleDisplayNames *ldn); + +/* names for entire locales */ + +/** + * Returns the display name of the provided locale. + * @param ldn the LocaleDisplayNames instance + * @param locale the locale whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_localeDisplayName(const ULocaleDisplayNames *ldn, + const char *locale, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/* names for components of a locale */ + +/** + * Returns the display name of the provided language code. + * @param ldn the LocaleDisplayNames instance + * @param lang the language code whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_languageDisplayName(const ULocaleDisplayNames *ldn, + const char *lang, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** + * Returns the display name of the provided script. + * @param ldn the LocaleDisplayNames instance + * @param script the script whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_scriptDisplayName(const ULocaleDisplayNames *ldn, + const char *script, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** + * Returns the display name of the provided script code. + * @param ldn the LocaleDisplayNames instance + * @param scriptCode the script code whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn, + UScriptCode scriptCode, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** + * Returns the display name of the provided region code. + * @param ldn the LocaleDisplayNames instance + * @param region the region code whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_regionDisplayName(const ULocaleDisplayNames *ldn, + const char *region, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** + * Returns the display name of the provided variant + * @param ldn the LocaleDisplayNames instance + * @param variant the variant whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_variantDisplayName(const ULocaleDisplayNames *ldn, + const char *variant, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** + * Returns the display name of the provided locale key + * @param ldn the LocaleDisplayNames instance + * @param key the locale key whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_keyDisplayName(const ULocaleDisplayNames *ldn, + const char *key, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** + * Returns the display name of the provided value (used with the provided key). + * @param ldn the LocaleDisplayNames instance + * @param key the locale key + * @param value the locale key's value + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn, + const char *key, + const char *value, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** +* Returns an instance of LocaleDisplayNames that returns names formatted +* for the provided locale, using the provided UDisplayContext settings. +* +* @param locale The display locale +* @param contexts List of one or more context settings (e.g. for dialect +* handling, capitalization, etc. +* @param length Number of items in the contexts list +* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates +* a failure status, the function will do nothing; otherwise this will be +* updated with any new status from the function. +* @return a ULocaleDisplayNames instance +* @stable ICU 51 +*/ +U_CAPI ULocaleDisplayNames * U_EXPORT2 +uldn_openForContext(const char * locale, UDisplayContext *contexts, + int32_t length, UErrorCode *pErrorCode); + +/** +* Returns the UDisplayContext value for the specified UDisplayContextType. +* @param ldn the ULocaleDisplayNames instance +* @param type the UDisplayContextType whose value to return +* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates +* a failure status, the function will do nothing; otherwise this will be +* updated with any new status from the function. +* @return the UDisplayContextValue for the specified type. +* @stable ICU 51 +*/ +U_CAPI UDisplayContext U_EXPORT2 +uldn_getContext(const ULocaleDisplayNames *ldn, UDisplayContextType type, + UErrorCode *pErrorCode); + +#endif /* !UCONFIG_NO_FORMATTING */ +#endif /* __ULDNAMES_H__ */ diff --git a/third_party/icu4c/ndk_headers/unicode/ulocdata.h b/third_party/icu4c/ndk_headers/unicode/ulocdata.h new file mode 100644 index 0000000000000000000000000000000000000000..c3dac41bfb61ee78171cd67f478d3475c98cd611 --- /dev/null +++ b/third_party/icu4c/ndk_headers/unicode/ulocdata.h @@ -0,0 +1,33 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* * +* Copyright (C) 2003-2015, International Business Machines * +* Corporation and others. All Rights Reserved. * +* * +****************************************************************************** +* file name: ulocdata.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003Oct21 +* created by: Ram Viswanadha +*/ + +#ifndef __ULOCDATA_H__ +#define __ULOCDATA_H__ + +#include "unicode/uloc.h" + +/** + * Return the current CLDR version used by the library. + * @param versionArray fill-in that will receive the version number + * @param status error code - could be U_MISSING_RESOURCE_ERROR if the version was not found. + * @stable ICU 4.2 + */ +U_CAPI void U_EXPORT2 +ulocdata_getCLDRVersion(UVersionInfo versionArray, UErrorCode *status); + +#endif diff --git a/third_party/icu4c/ndk_headers/unicode/utf.h b/third_party/icu4c/ndk_headers/unicode/utf.h new file mode 100644 index 0000000000000000000000000000000000000000..00623e546ce257b6ad91b21ca9c8c19947cfd224 --- /dev/null +++ b/third_party/icu4c/ndk_headers/unicode/utf.h @@ -0,0 +1,222 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep09 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: Code point macros + * + * This file defines macros for checking whether a code point is + * a surrogate or a non-character etc. + * + * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h + * and itself includes utf8.h and utf16.h after some + * common definitions. + * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be + * included explicitly if their definitions are used. + * + * utf8.h and utf16.h define macros for efficiently getting code points + * in and out of UTF-8/16 strings. + * utf16.h macros have "U16_" prefixes. + * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling. + * + * ICU mostly processes 16-bit Unicode strings. + * Most of the time, such strings are well-formed UTF-16. + * Single, unpaired surrogates must be handled as well, and are treated in ICU + * like regular code points where possible. + * (Pairs of surrogate code points are indistinguishable from supplementary + * code points encoded as pairs of supplementary code units.) + * + * In fact, almost all Unicode code points in normal text (>99%) + * are on the BMP (<=U+ffff) and even <=U+d7ff. + * ICU functions handle supplementary code points (U+10000..U+10ffff) + * but are optimized for the much more frequently occurring BMP code points. + * + * umachine.h defines UChar to be an unsigned 16-bit integer. + * Since ICU 59, ICU uses char16_t in C++, UChar only in C, + * and defines UChar=char16_t by default. See the UChar API docs for details. + * + * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit + * Unicode code point (Unicode scalar value, 0..0x10ffff) and U_SENTINEL (-1). + * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as + * the definition of UChar. For details see the documentation for UChar32 itself. + * + * utf.h defines a small number of C macros for single Unicode code points. + * These are simple checks for surrogates and non-characters. + * For actual Unicode character properties see uchar.h. + * + * By default, string operations must be done with error checking in case + * a string is not well-formed UTF-16 or UTF-8. + * + * The U16_ macros detect if a surrogate code unit is unpaired + * (lead unit without trail unit or vice versa) and just return the unit itself + * as the code point. + * + * The U8_ macros detect illegal byte sequences and return a negative value. + * Starting with ICU 60, the observable length of a single illegal byte sequence + * skipped by one of these macros follows the Unicode 6+ recommendation + * which is consistent with the W3C Encoding Standard. + * + * There are ..._OR_FFFD versions of both U16_ and U8_ macros + * that return U+FFFD for illegal code unit sequences. + * + * The regular "safe" macros require that the initial, passed-in string index + * is within bounds. They only check the index when they read more than one + * code unit. This is usually done with code similar to the following loop: + *
while(i+ * + * When it is safe to assume that text is well-formed UTF-16 + * (does not contain single, unpaired surrogates), then one can use + * U16_..._UNSAFE macros. + * These do not check for proper code unit sequences or truncated text and may + * yield wrong results or even cause a crash if they are used with "malformed" + * text. + * In practice, U16_..._UNSAFE macros will produce slightly less code but + * should not be faster because the processing is only different when a + * surrogate code unit is detected, which will be rare. + * + * Similarly for UTF-8, there are "safe" macros without a suffix, + * and U8_..._UNSAFE versions. + * The performance differences are much larger here because UTF-8 provides so + * many opportunities for malformed sequences. + * The unsafe UTF-8 macros are entirely implemented inside the macro definitions + * and are fast, while the safe UTF-8 macros call functions for some complicated cases. + * + * Unlike with UTF-16, malformed sequences cannot be expressed with distinct + * code point values (0..U+10ffff). They are indicated with negative values instead. + * + * For more information see the ICU User Guide Strings chapter + * (https://unicode-org.github.io/icu/userguide/strings). + * + * Usage: + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + * + * @stable ICU 2.4 + */ + +#ifndef __UTF_H__ +#define __UTF_H__ + +#include "unicode/umachine.h" +/* include the utfXX.h after the following definitions */ + +/* single-code point definitions -------------------------------------------- */ + +/** + * Is this code point a Unicode noncharacter? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +#define U_IS_UNICODE_NONCHAR(c) \ + ((c)>=0xfdd0 && \ + ((c)<=0xfdef || ((c)&0xfffe)==0xfffe) && (c)<=0x10ffff) + +/** + * Is c a Unicode code point value (0..U+10ffff) + * that can be assigned a character? + * + * Code points that are not characters include: + * - single surrogate code points (U+d800..U+dfff, 2048 code points) + * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) + * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) + * - the highest Unicode code point value is U+10ffff + * + * This means that all code points below U+d800 are character code points, + * and that boundary is tested first for performance. + * + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +#define U_IS_UNICODE_CHAR(c) \ + ((uint32_t)(c)<0xd800 || \ + (0xdfff<(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c))) + +/** + * Is this code point a BMP code point (U+0000..U+ffff)? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.8 + */ +#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff) + +/** + * Is this code point a supplementary code point (U+10000..U+10ffff)? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.8 + */ +#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff) + +/** + * Is this code point a lead surrogate (U+d800..U+dbff)? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) + +/** + * Is this code point a trail surrogate (U+dc00..U+dfff)? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) + +/** + * Is this code point a surrogate (U+d800..U+dfff)? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) + +/** + * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +/** + * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), + * is it a trail surrogate? + * @param c 32-bit code point + * @return true or false + * @stable ICU 4.2 + */ +#define U_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) + +/* include the utfXX.h ------------------------------------------------------ */ + +#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS + +#include "unicode/utf8.h" +#include "unicode/utf16.h" + +#endif /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */ + +#endif /* __UTF_H__ */ diff --git a/third_party/icu4c/ndk_headers/unicode/utf16.h b/third_party/icu4c/ndk_headers/unicode/utf16.h new file mode 100644 index 0000000000000000000000000000000000000000..93c14e124897faf96a0c6f7d9566e85f94f4c829 --- /dev/null +++ b/third_party/icu4c/ndk_headers/unicode/utf16.h @@ -0,0 +1,728 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf16.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep09 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: 16-bit Unicode handling macros + * + * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. + * + * For more information see utf.h and the ICU User Guide Strings chapter + * (https://unicode-org.github.io/icu/userguide/strings). + * + * Usage: + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + */ + +#ifndef __UTF16_H__ +#define __UTF16_H__ + +#include +#include "unicode/umachine.h" +#ifndef __UTF_H__ +# include "unicode/utf.h" +#endif + +/* single-code point definitions -------------------------------------------- */ + +/** + * Does this code unit alone encode a code point (BMP, not a surrogate)? + * @param c 16-bit code unit + * @return true or false + * @stable ICU 2.4 + */ +#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) + +/** + * Is this code unit a lead surrogate (U+d800..U+dbff)? + * @param c 16-bit code unit + * @return true or false + * @stable ICU 2.4 + */ +#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) + +/** + * Is this code unit a trail surrogate (U+dc00..U+dfff)? + * @param c 16-bit code unit + * @return true or false + * @stable ICU 2.4 + */ +#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) + +/** + * Is this code unit a surrogate (U+d800..U+dfff)? + * @param c 16-bit code unit + * @return true or false + * @stable ICU 2.4 + */ +#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) + +/** + * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 16-bit code unit + * @return true or false + * @stable ICU 2.4 + */ +#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +/** + * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), + * is it a trail surrogate? + * @param c 16-bit code unit + * @return true or false + * @stable ICU 4.2 + */ +#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) + +/** + * Get a supplementary code point value (U+10000..U+10ffff) + * from its lead and trail surrogates. + * The result is undefined if the input values are not + * lead and trail surrogates. + * + * @param lead lead surrogate (U+d800..U+dbff) + * @param trail trail surrogate (U+dc00..U+dfff) + * @return supplementary code point (U+10000..U+10ffff) + * @stable ICU 2.4 + */ +#define U16_GET_SUPPLEMENTARY(lead, trail) \ + (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) + + +/** + * Get the lead surrogate (0xd800..0xdbff) for a + * supplementary code point (0x10000..0x10ffff). + * @param supplementary 32-bit code point (U+10000..U+10ffff) + * @return lead surrogate (U+d800..U+dbff) for supplementary + * @stable ICU 2.4 + */ +#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) + +/** + * Get the trail surrogate (0xdc00..0xdfff) for a + * supplementary code point (0x10000..0x10ffff). + * @param supplementary 32-bit code point (U+10000..U+10ffff) + * @return trail surrogate (U+dc00..U+dfff) for supplementary + * @stable ICU 2.4 + */ +#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) + +/** + * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) + * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). + * @param c 32-bit code point + * @return 1 or 2 + * @stable ICU 2.4 + */ +#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) + +/** + * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). + * @return 2 + * @stable ICU 2.4 + */ +#define U16_MAX_LENGTH 2 + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * The result is undefined if the offset points to a single, unpaired surrogate. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_GET + * @stable ICU 2.4 + */ +#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ + } else { \ + (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to a single, unpaired surrogate, then + * c is set to that unpaired surrogate. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i (start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to a single, unpaired surrogate, then + * c is set to U+FFFD. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i (start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } else { \ + (c)=0xfffd; \ + } \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/* definitions with forward iteration --------------------------------------- */ + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate, then that itself + * will be returned as the code point. + * The result is undefined if the offset points to a single, unpaired lead surrogate. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_NEXT + * @stable ICU 2.4 + */ +#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[(i)++]; \ + if(U16_IS_LEAD(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate or + * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. + * + * @param s const UChar * string + * @param i string offset, must be i >10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Safe" macro, checks for a valid code point. + * If a surrogate pair is written, checks for sufficient space in the string. + * If the code point is not valid or a trail surrogate does not fit, + * then isError is set to true. + * + * @param s const UChar * string buffer + * @param i string offset, must be i >10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } else /* c>0x10ffff or not enough space */ { \ + (isError)=true; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_FWD_1 + * @stable ICU 2.4 + */ +#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U16_IS_LEAD((s)[(i)++])) { \ + ++(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const UChar * string + * @param i string offset, must be i 0) { \ + U16_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const UChar * string + * @param i int32_t string offset, must be i 0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ + U16_FWD_1(s, i, length); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to the trail surrogate of a surrogate pair, + * then the offset is decremented. + * Otherwise, it is not modified. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_SET_CP_START + * @stable ICU 2.4 + */ +#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U16_IS_TRAIL((s)[i])) { \ + --(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to the trail surrogate of a surrogate pair, + * then the offset is decremented. + * Otherwise, it is not modified. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i + * @see U16_SET_CP_START_UNSAFE + * @stable ICU 2.4 + */ +#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ + --(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/* definitions with backward iteration -------------------------------------- */ + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate, then that itself + * will be returned as the code point. + * The result is undefined if the offset is behind a single, unpaired trail surrogate. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_PREV + * @stable ICU 2.4 + */ +#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[--(i)]; \ + if(U16_IS_TRAIL(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate or behind a single, unpaired + * trail surrogate, then c is set to that unpaired surrogate. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate or behind a single, unpaired + * trail surrogate, then c is set to U+FFFD. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } else { \ + (c)=0xfffd; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_BACK_1 + * @stable ICU 2.4 + */ +#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U16_IS_TRAIL((s)[--(i)])) { \ + --(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start(start) && U16_IS_LEAD((s)[(i)-1])) { \ + --(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @param n number of code points to skip + * @see U16_BACK_N + * @stable ICU 2.4 + */ +#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0) { \ + U16_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start start of string + * @param i string offset, must be start0 && (i)>(start)) { \ + U16_BACK_1(s, start, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind the lead surrogate of a surrogate pair, + * then the offset is incremented. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_SET_CP_LIMIT + * @stable ICU 2.4 + */ +#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U16_IS_LEAD((s)[(i)-1])) { \ + ++(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind the lead surrogate of a surrogate pair, + * then the offset is incremented. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const UChar * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, start<=i<=length + * @param length int32_t string length + * @see U16_SET_CP_LIMIT_UNSAFE + * @stable ICU 2.4 + */ +#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ + if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +#endif diff --git a/third_party/icu4c/ndk_headers/unicode/utf8.h b/third_party/icu4c/ndk_headers/unicode/utf8.h new file mode 100644 index 0000000000000000000000000000000000000000..9d7bd37ce4979388e8e999b17ed1a6bce255c888 --- /dev/null +++ b/third_party/icu4c/ndk_headers/unicode/utf8.h @@ -0,0 +1,760 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf8.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep13 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: 8-bit Unicode handling macros + * + * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings. + * + * For more information see utf.h and the ICU User Guide Strings chapter + * (https://unicode-org.github.io/icu/userguide/strings). + * + * Usage: + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + */ + +#ifndef __UTF8_H__ +#define __UTF8_H__ + +#include +#include "unicode/umachine.h" +#ifndef __UTF_H__ +# include "unicode/utf.h" +#endif + +/* single-code point definitions -------------------------------------------- */ + +/** + * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)? + * @param c 8-bit code unit (byte) + * @return true or false + * @stable ICU 2.4 + */ +#define U8_IS_SINGLE(c) (((c)&0x80)==0) + +/** + * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4) + * @param c 8-bit code unit (byte) + * @return true or false + * @stable ICU 2.4 + */ +#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32) +// 0x32=0xf4-0xc2 + +/** + * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF) + * @param c 8-bit code unit (byte) + * @return true or false + * @stable ICU 2.4 + */ +#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40) + +/** + * How many code units (bytes) are used for the UTF-8 encoding + * of this Unicode code point? + * @param c 32-bit code point + * @return 1..4, or 0 if c is a surrogate or not a Unicode code point + * @stable ICU 2.4 + */ +#define U8_LENGTH(c) \ + ((uint32_t)(c)<=0x7f ? 1 : \ + ((uint32_t)(c)<=0x7ff ? 2 : \ + ((uint32_t)(c)<=0xd7ff ? 3 : \ + ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \ + ((uint32_t)(c)<=0xffff ? 3 : 4)\ + ) \ + ) \ + ) \ + ) + +/** + * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff). + * @return 4 + * @stable ICU 2.4 + */ +#define U8_MAX_LENGTH 4 + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * The result is undefined if the offset points to an illegal UTF-8 + * byte sequence. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_GET + * @stable ICU 2.4 + */ +#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t _u8_get_unsafe_index=(int32_t)(i); \ + U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \ + U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \ +} UPRV_BLOCK_MACRO_END + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to an illegal UTF-8 byte sequence, then + * c is set to a negative value. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset + * @param i int32_t string offset, must be start<=i =0xe0 ? \ + ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \ + U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \ + (__t&=0x3f, 1) \ + : /* U+10000..U+10FFFF */ \ + ((c)-=0xf0)<=4 && \ + U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \ + ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \ + (__t=(s)[i]-0x80)<=0x3f) && \ + /* valid second-to-last trail byte */ \ + ((c)=((c)<<6)|__t, ++(i)!=(length)) \ + : /* U+0080..U+07FF */ \ + (c)>=0xc2 && ((c)&=0x1f, 1)) && \ + /* last trail byte */ \ + (__t=(s)[i]-0x80)<=0x3f && \ + ((c)=((c)<<6)|__t, ++(i), 1)) { \ + } else { \ + (c)=(sub); /* ill-formed*/ \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Append a code point to a string, overwriting 1 to 4 bytes. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Unsafe" macro, assumes a valid code point and sufficient space in the string. + * Otherwise, the result is undefined. + * + * @param s const uint8_t * string buffer + * @param i string offset + * @param c code point to append + * @see U8_APPEND + * @stable ICU 2.4 + */ +#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ + } else { \ + if(__uc<=0x7ff) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ + } else { \ + if(__uc<=0xffff) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ + } else { \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Append a code point to a string, overwriting 1 to 4 bytes. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Safe" macro, checks for a valid code point. + * If a non-ASCII code point is written, checks for sufficient space in the string. + * If the code point is not valid or trail bytes do not fit, + * then isError is set to true. + * + * @param s const uint8_t * string buffer + * @param i int32_t string offset, must be i >6)|0xc0); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else { \ + (isError)=true; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_FWD_1 + * @stable ICU 2.4 + */ +#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i =0xf0 */ { \ + if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \ + ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \ + ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @param n number of code points to skip + * @see U8_FWD_N + * @stable ICU 2.4 + */ +#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0) { \ + U8_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i 0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ + U8_FWD_1(s, i, length); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to a UTF-8 trail byte, + * then the offset is moved backward to the corresponding lead byte. + * Otherwise, it is not modified. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_SET_CP_START + * @stable ICU 2.4 + */ +#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + while(U8_IS_TRAIL((s)[i])) { --(i); } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to a UTF-8 trail byte, + * then the offset is moved backward to the corresponding lead byte. + * Otherwise, it is not modified. + * + * "Safe" macro, checks for illegal sequences and for string boundaries. + * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i]. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<=i + * @see U8_SET_CP_START_UNSAFE + * @see U8_TRUNCATE_IF_INCOMPLETE + * @stable ICU 2.4 + */ +#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U8_IS_TRAIL((s)[(i)])) { \ + (i)=utf8_back1SafeBody(s, start, (i)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * If the string ends with a UTF-8 byte sequence that is valid so far + * but incomplete, then reduce the length of the string to end before + * the lead byte of that incomplete sequence. + * For example, if the string ends with E1 80, the length is reduced by 2. + * + * In all other cases (the string ends with a complete sequence, or it is not + * possible for any further trail byte to extend the trailing sequence) + * the length remains unchanged. + * + * Useful for processing text split across multiple buffers + * (save the incomplete sequence for later) + * and for optimizing iteration + * (check for string length only once per character). + * + * "Safe" macro, checks for illegal sequences and for string boundaries. + * Unlike U8_SET_CP_START(), this macro never reads s[length]. + * + * (In UTF-16, simply check for U16_IS_LEAD(last code unit).) + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param length int32_t string length (usually start<=length) + * @see U8_SET_CP_START + * @stable ICU 61 + */ +#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \ + if((length)>(start)) { \ + uint8_t __b1=s[(length)-1]; \ + if(U8_IS_SINGLE(__b1)) { \ + /* common ASCII character */ \ + } else if(U8_IS_LEAD(__b1)) { \ + --(length); \ + } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \ + uint8_t __b2=s[(length)-2]; \ + if(0xe0<=__b2 && __b2<=0xf4) { \ + if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \ + U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \ + (length)-=2; \ + } \ + } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \ + uint8_t __b3=s[(length)-3]; \ + if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \ + (length)-=3; \ + } \ + } \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/* definitions with backward iteration -------------------------------------- */ + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * The result is undefined if the offset is behind an illegal UTF-8 sequence. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_PREV + * @stable ICU 2.4 + */ +#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(uint8_t)(s)[--(i)]; \ + if(U8_IS_TRAIL(c)) { \ + uint8_t __b, __count=1, __shift=6; \ +\ + /* c is a trail byte */ \ + (c)&=0x3f; \ + for(;;) { \ + __b=(s)[--(i)]; \ + if(__b>=0xc0) { \ + U8_MASK_LEAD_BYTE(__b, __count); \ + (c)|=(UChar32)__b<<__shift; \ + break; \ + } else { \ + (c)|=(UChar32)(__b&0x3f)<<__shift; \ + ++__count; \ + __shift+=6; \ + } \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start0) { \ + U8_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param start int32_t index of the start of the string + * @param i int32_t string offset, must be start0 && (i)>(start)) { \ + U8_BACK_1(s, start, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind a partial multi-byte sequence, + * then the offset is incremented to behind the whole sequence. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_SET_CP_LIMIT + * @stable ICU 2.4 + */ +#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + U8_BACK_1_UNSAFE(s, i); \ + U8_FWD_1_UNSAFE(s, i); \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind a partial multi-byte sequence, + * then the offset is incremented to behind the whole sequence. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<=i<=length + * @param length int32_t string length + * @see U8_SET_CP_LIMIT_UNSAFE + * @stable ICU 2.4 + */ +#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ + if((start)<(i) && ((i)<(length) || (length)<0)) { \ + U8_BACK_1(s, start, i); \ + U8_FWD_1(s, i, length); \ + } \ +} UPRV_BLOCK_MACRO_END + +#endif diff --git a/third_party/icu4c/ndk_headers/unicode/utypes.h b/third_party/icu4c/ndk_headers/unicode/utypes.h index 8293ca70e7caafc051c959bd87c1d8b532341dca..1df6d0ec202da9f1b3574057df4de9002c12a9aa 100644 --- a/third_party/icu4c/ndk_headers/unicode/utypes.h +++ b/third_party/icu4c/ndk_headers/unicode/utypes.h @@ -36,6 +36,7 @@ #include "unicode/umachine.h" +#include "unicode/uversion.h" #include "unicode/uconfig.h" #include diff --git a/third_party/icu4c/ndk_headers/unicode/uvernum.h b/third_party/icu4c/ndk_headers/unicode/uvernum.h new file mode 100644 index 0000000000000000000000000000000000000000..e77cd6195c80f05fdce228e7b71fb65d61b92f56 --- /dev/null +++ b/third_party/icu4c/ndk_headers/unicode/uvernum.h @@ -0,0 +1,128 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: uvernum.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Vladimir Weinstein +* Updated by: Steven R. Loomis +* +*/ + +/** + * \file + * \brief C API: definitions of ICU version numbers + * + * This file is included by uversion.h and other files. This file contains only + * macros and definitions. The actual version numbers are defined here. + */ + + /* + * IMPORTANT: When updating version, the following things need to be done: + * source/common/unicode/uvernum.h - this file: update major, minor, + * patchlevel, suffix, version, short version constants, namespace, + * renaming macro, and copyright + * + * The following files need to be updated as well, which can be done + * by running the UNIX makefile target 'update-windows-makefiles' in icu4c/source. + * + * source/allinone/Build.Windows.IcuVersion.props - Update the IcuMajorVersion + * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains + * the new major/minor combination, and UNICODE_VERSION + * for the Unicode version. + */ + +#ifndef UVERNUM_H +#define UVERNUM_H + +/** The standard copyright notice that gets compiled into each library. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_COPYRIGHT_STRING \ + " Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html " + +/** The current ICU major version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION_MAJOR_NUM 72 + +/** The current ICU minor version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_MINOR_NUM 1 + +/** The current ICU patchlevel version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION_PATCHLEVEL_NUM 0 + +/** The current ICU build level version as an integer. + * This value is for use by ICU clients. It defaults to 0. + * @stable ICU 4.0 + */ +#ifndef U_ICU_VERSION_BUILDLEVEL_NUM +#define U_ICU_VERSION_BUILDLEVEL_NUM 0 +#endif + +/** Glued version suffix for renamers + * This value will change in the subsequent releases of ICU + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_SUFFIX _72 + +/** The current ICU library version as a dotted-decimal string. The patchlevel + * only appears in this string if it non-zero. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION "72.1" + +/** + * The current ICU library major version number as a string, for library name suffixes. + * This value will change in subsequent releases of ICU. + * + * Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers + * into one string without dots ("48"). + * Since ICU 49, it is the double-digit major ICU version number. + * See https://unicode-org.github.io/icu/userguide/design#version-numbers-in-icu + * + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_SHORT "72" + +/*=========================================================================== + * ICU collation framework version information + * Version info that can be obtained from a collator is affected by these + * numbers in a secret and magic way. Please use collator version as whole + *=========================================================================== + */ + +/** + * Collation runtime version (sort key generator, strcoll). + * If the version is different, sort keys for the same string could be different. + * This value may change in subsequent releases of ICU. + * @stable ICU 2.4 + */ +#define UCOL_RUNTIME_VERSION 9 + +/** + * Collation builder code version. + * When this is different, the same tailoring might result + * in assigning different collation elements to code points. + * This value may change in subsequent releases of ICU. + * @stable ICU 2.4 + */ +#define UCOL_BUILDER_VERSION 9 + +#endif diff --git a/third_party/icu4c/ndk_headers/unicode/uversion.h b/third_party/icu4c/ndk_headers/unicode/uversion.h new file mode 100644 index 0000000000000000000000000000000000000000..5b4f37375879094e9777ab78f5a2df0925803a3c --- /dev/null +++ b/third_party/icu4c/ndk_headers/unicode/uversion.h @@ -0,0 +1,119 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2000-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: uversion.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Vladimir Weinstein +* +* Gets included by utypes.h and Windows .rc files +*/ + +/** + * \file + * \brief C API: API for accessing ICU version numbers. + */ +/*===========================================================================*/ +/* Main ICU version information */ +/*===========================================================================*/ + +#ifndef UVERSION_H +#define UVERSION_H + +#include "unicode/umachine.h" + +/* Actual version info lives in uvernum.h */ +#include "unicode/uvernum.h" + +/** Maximum length of the copyright string. + * @stable ICU 2.4 + */ +#define U_COPYRIGHT_STRING_LENGTH 128 + +/** An ICU version consists of up to 4 numbers from 0..255. + * @stable ICU 2.4 + */ +#define U_MAX_VERSION_LENGTH 4 + +/** In a string, ICU version fields are delimited by dots. + * @stable ICU 2.4 + */ +#define U_VERSION_DELIMITER '.' + +/** The maximum length of an ICU version string. + * @stable ICU 2.4 + */ +#define U_MAX_VERSION_STRING_LENGTH 20 + +/** The binary form of a version on ICU APIs is an array of 4 uint8_t. + * To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)). + * @stable ICU 2.4 + */ +typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; + +/*===========================================================================*/ +/* General version helper functions. Definitions in putil.c */ +/*===========================================================================*/ + +/** + * Parse a string with dotted-decimal version information and + * fill in a UVersionInfo structure with the result. + * Definition of this function lives in putil.c + * + * @param versionArray The destination structure for the version information. + * @param versionString A string with dotted-decimal version information, + * with up to four non-negative number fields with + * values of up to 255 each. + * @stable ICU 2.4 + */ +U_CAPI void U_EXPORT2 +u_versionFromString(UVersionInfo versionArray, const char *versionString); + +/** + * Parse a Unicode string with dotted-decimal version information and + * fill in a UVersionInfo structure with the result. + * Definition of this function lives in putil.c + * + * @param versionArray The destination structure for the version information. + * @param versionString A Unicode string with dotted-decimal version + * information, with up to four non-negative number + * fields with values of up to 255 each. + * @stable ICU 4.2 + */ +U_CAPI void U_EXPORT2 +u_versionFromUString(UVersionInfo versionArray, const UChar *versionString); + + +/** + * Write a string with dotted-decimal version information according + * to the input UVersionInfo. + * Definition of this function lives in putil.c + * + * @param versionArray The version information to be written as a string. + * @param versionString A string buffer that will be filled in with + * a string corresponding to the numeric version + * information in versionArray. + * The buffer size must be at least U_MAX_VERSION_STRING_LENGTH. + * @stable ICU 2.4 + */ +U_CAPI void U_EXPORT2 +u_versionToString(const UVersionInfo versionArray, char *versionString); + +/** + * Gets the ICU release version. The version array stores the version information + * for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02. + * Definition of this function lives in putil.c + * + * @param versionArray the version # information, the result will be filled in + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +u_getVersion(UVersionInfo versionArray); +#endif