📄 ucnv.h
字号:
/************************************************************************ Copyright (C) 1999-2004, International Business Machines* Corporation and others. All Rights Reserved.********************************************************************** * ucnv.h: * External APIs for the ICU's codeset conversion library * Bertrand A. Damiba * * Modification History: * * Date Name Description * 04/04/99 helena Fixed internal header inclusion. * 05/11/00 helena Added setFallback and usesFallback APIs. * 06/29/2000 helena Major rewrite of the callback APIs. * 12/07/2000 srl Update of documentation *//** * \file * \brief C API: Character conversion * * <h2>Character Conversion C API</h2> * * <p>This API is used to convert codepage or character encoded data to and * from UTF-16. You can open a converter with {@link ucnv_open() }. With that * converter, you can get its properties, set options, convert your data and * close the converter.</p> * * <p>Since many software programs recogize different converter names for * different types of converters, there are other functions in this API to * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() }, * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the * more frequently used alias functions to get this information.</p> * * <p>When a converter encounters an illegal, irregular, invalid or unmappable character * its default behavior is to use a substitution character to replace the * bad byte sequence. This behavior can be changed by using {@link ucnv_getFromUCallBack() } * or {@link ucnv_getToUCallBack() } on the converter. The header ucnv_err.h defines * many other callback actions that can be used instead of a character substitution.</p> * * <p>More information about this API can be found in our * <a href="http://oss.software.ibm.com/icu/userguide/conversion.html">User's * Guide</a>.</p> */#ifndef UCNV_H#define UCNV_H#include "unicode/ucnv_err.h"#include "unicode/uenum.h"#ifndef __USET_H__/** * USet is the C API type for Unicode sets. * It is forward-declared here to avoid including the header file if related * conversion APIs are not used. * See unicode/uset.h * * @see ucnv_getUnicodeSet * @stable ICU 2.6 */struct USet;/** @stable ICU 2.6 */typedef struct USet USet;#endif#if !UCONFIG_NO_CONVERSIONU_CDECL_BEGIN/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */#define UCNV_MAX_CONVERTER_NAME_LENGTH 60/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */#define UCNV_SI 0x0F/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */#define UCNV_SO 0x0E/** * Enum for specifying basic types of converters * @see ucnv_getType * @stable ICU 2.0 */typedef enum { UCNV_UNSUPPORTED_CONVERTER = -1, UCNV_SBCS = 0, UCNV_DBCS = 1, UCNV_MBCS = 2, UCNV_LATIN_1 = 3, UCNV_UTF8 = 4, UCNV_UTF16_BigEndian = 5, UCNV_UTF16_LittleEndian = 6, UCNV_UTF32_BigEndian = 7, UCNV_UTF32_LittleEndian = 8, UCNV_EBCDIC_STATEFUL = 9, UCNV_ISO_2022 = 10, UCNV_LMBCS_1 = 11, UCNV_LMBCS_2, UCNV_LMBCS_3, UCNV_LMBCS_4, UCNV_LMBCS_5, UCNV_LMBCS_6, UCNV_LMBCS_8, UCNV_LMBCS_11, UCNV_LMBCS_16, UCNV_LMBCS_17, UCNV_LMBCS_18, UCNV_LMBCS_19, UCNV_LMBCS_LAST = UCNV_LMBCS_19, UCNV_HZ, UCNV_SCSU, UCNV_ISCII, UCNV_US_ASCII, UCNV_UTF7, UCNV_BOCU1, UCNV_UTF16, UCNV_UTF32, UCNV_CESU8, UCNV_IMAP_MAILBOX, /* Number of converter types for which we have conversion routines. */ UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES} UConverterType;/** * Enum for specifying which platform a converter ID refers to. * The use of platform/CCSID is not recommended. See ucnv_openCCSID(). * * @see ucnv_getPlatform * @see ucnv_openCCSID * @see ucnv_getCCSID * @stable ICU 2.0 */typedef enum { UCNV_UNKNOWN = -1, UCNV_IBM = 0} UConverterPlatform;/** * Function pointer for error callback in the codepage to unicode direction. * Called when an error has occured in conversion to unicode, or on open/close of the callback (see reason). * @param context Pointer to the callback's private data * @param args Information about the conversion in progress * @param codeUnits Points to 'length' bytes of the concerned codepage sequence * @param length Size (in bytes) of the concerned codepage sequence * @param reason Defines the reason the callback was invoked * @see ucnv_setToUCallBack * @see UConverterToUnicodeArgs * @stable ICU 2.0 */typedef void (U_EXPORT2 *UConverterToUCallback) ( const void* context, UConverterToUnicodeArgs *args, const char *codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode *);/** * Function pointer for error callback in the unicode to codepage direction. * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason). * @param context Pointer to the callback's private data * @param args Information about the conversion in progress * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence * @param length Size (in bytes) of the concerned codepage sequence * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. * @param reason Defines the reason the callback was invoked * @see ucnv_setFromUCallBack * @stable ICU 2.0 */typedef void (U_EXPORT2 *UConverterFromUCallback) ( const void* context, UConverterFromUnicodeArgs *args, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode *);U_CDECL_END/** * Character that separates converter names from options and options from each other. * @see ucnv_open * @stable ICU 2.0 */#define UCNV_OPTION_SEP_CHAR ','/** * String version of UCNV_OPTION_SEP_CHAR. * @see ucnv_open * @stable ICU 2.0 */#define UCNV_OPTION_SEP_STRING ","/** * Character that separates a converter option from its value. * @see ucnv_open * @stable ICU 2.0 */#define UCNV_VALUE_SEP_CHAR '='/** * String version of UCNV_VALUE_SEP_CHAR. * @see ucnv_open * @stable ICU 2.0 */#define UCNV_VALUE_SEP_STRING "="/** * Converter option for specifying a locale. * For example, ucnv_open("SCSU,locale=ja", &errorCode); * See convrtrs.txt. * * @see ucnv_open * @stable ICU 2.0 */#define UCNV_LOCALE_OPTION_STRING ",locale="/** * Converter option for specifying a version selector (0..9) for some converters. * For example, ucnv_open("UTF-7,version=1", &errorCode); * See convrtrs.txt. * * @see ucnv_open * @stable ICU 2.4 */#define UCNV_VERSION_OPTION_STRING ",version="/** * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages. * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on * S/390 (z/OS) Unix System Services (Open Edition). * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode); * See convrtrs.txt. * * @see ucnv_open * @stable ICU 2.4 */#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl"/** * Do a fuzzy compare of a two converter/alias names. The comparison * is case-insensitive. It also ignores the characters '-', '_', and * ' ' (dash, underscore, and space). Thus the strings "UTF-8", * "utf_8", and "Utf 8" are exactly equivalent. * * @param name1 a converter name or alias, zero-terminated * @param name2 a converter name or alias, zero-terminated * @return 0 if the names match, or a negative value if the name1 * lexically precedes name2, or a positive value if the name1 * lexically follows name2. * @stable ICU 2.0 */U_STABLE int U_EXPORT2ucnv_compareNames(const char *name1, const char *name2);/** * Creates a UConverter object with the names specified as a C string. * The actual name will be resolved with the alias file * using a case-insensitive string comparison that ignores * the delimiters '-', '_', and ' ' (dash, underscore, and space). * E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent. * If <code>NULL</code> is passed for the converter name, it will create one with the * getDefaultName return value. * * <p>A converter name for ICU 1.5 and above may contain options * like a locale specification to control the specific behavior of * the newly instantiated converter. * The meaning of the options depends on the particular converter. * If an option is not defined for or recognized by a given converter, then it is ignored.</p> * * <p>Options are appended to the converter name string, with a * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and * also between adjacent options.</p> * * <p>If the alias is ambiguous, then the preferred converter is used * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p> * * <p>The conversion behavior and names can vary between platforms. ICU may * convert some characters differently from other platforms. Details on this topic * are in the <a href="http://oss.software.ibm.com/icu/userguide/conversion.html">User's * Guide</a>.</p> * * @param converterName Name of the uconv table, may have options appended * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured * @see ucnv_openU * @see ucnv_openCCSID * @see ucnv_close * @stable ICU 2.0 */U_STABLE UConverter* U_EXPORT2 ucnv_open(const char *converterName, UErrorCode *err);/** * Creates a Unicode converter with the names specified as unicode string. * The name should be limited to the ASCII-7 alphanumerics range. * The actual name will be resolved with the alias file * using a case-insensitive string comparison that ignores * the delimiters '-', '_', and ' ' (dash, underscore, and space). * E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent. * If <TT>NULL</TT> is passed for the converter name, it will create * one with the ucnv_getDefaultName() return value.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -