📄 ustring.h
字号:
/************************************************************************ Copyright (C) 1998-2004, International Business Machines* Corporation and others. All Rights Reserved.************************************************************************ File ustring.h** Modification History:** Date Name Description* 12/07/98 bertrand Creation.*******************************************************************************/#ifndef USTRING_H#define USTRING_H#include "unicode/utypes.h"#include "unicode/putil.h"#include "unicode/uiter.h"/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR# define UBRK_TYPEDEF_UBREAK_ITERATOR typedef void UBreakIterator;#endif/** * \file * \brief C API: Unicode string handling functions * * These C API functions provide general Unicode string handling. * * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h> * functions. (For example, they do not check for bad arguments like NULL string pointers.) * In some cases, only the thread-safe variant of such a function is implemented here * (see u_strtok_r()). * * Other functions provide more Unicode-specific functionality like locale-specific * upper/lower-casing and string comparison in code point order. * * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units. * UTF-16 encodes each Unicode code point with either one or two UChar code units. * (This is the default form of Unicode, and a forward-compatible extension of the original, * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0 * in 1996.) * * Some APIs accept a 32-bit UChar32 value for a single code point. * * ICU also handles 16-bit Unicode text with unpaired surrogates. * Such text is not well-formed UTF-16. * Code-point-related functions treat unpaired surrogates as surrogate code points, * i.e., as separate units. * * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings), * it is much more efficient even for random access because the code unit values * for single-unit characters vs. lead units vs. trail units are completely disjoint. * This means that it is easy to determine character (code point) boundaries from * random offsets in the string. * * Unicode (UTF-16) string processing is optimized for the single-unit case. * Although it is important to support supplementary characters * (which use pairs of lead/trail code units called "surrogates"), * their occurrence is rare. Almost all characters in modern use require only * a single UChar code unit (i.e., their code point values are <=0xffff). * * For more details see the User Guide Strings chapter (http://oss.software.ibm.com/icu/userguide/strings.html). * For a discussion of the handling of unpaired surrogates see also * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18. *//** * Determine the length of an array of UChar. * * @param s The array of UChars, NULL (U+0000) terminated. * @return The number of UChars in <code>chars</code>, minus the terminator. * @stable ICU 2.0 */U_STABLE int32_t U_EXPORT2u_strlen(const UChar *s);/** * Count Unicode code points in the length UChar code units of the string. * A code point may occupy either one or two UChar code units. * Counting code points involves reading all code units. * * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h). * * @param s The input string. * @param length The number of UChar code units to be checked, or -1 to count all * code points before the first NUL (U+0000). * @return The number of code points in the specified code units. * @stable ICU 2.0 */U_STABLE int32_t U_EXPORT2u_countChar32(const UChar *s, int32_t length);/** * Check if the string contains more Unicode code points than a certain number. * This is more efficient than counting all code points in the entire string * and comparing that number with a threshold. * This function may not need to scan the string at all if the length is known * (not -1 for NUL-termination) and falls within a certain range, and * never needs to count more than 'number+1' code points. * Logically equivalent to (u_countChar32(s, length)>number). * A Unicode code point may occupy either one or two UChar code units. * * @param s The input string. * @param length The length of the string, or -1 if it is NUL-terminated. * @param number The number of code points in the string is compared against * the 'number' parameter. * @return Boolean value for whether the string contains more Unicode code points * than 'number'. Same as (u_countChar32(s, length)>number). * @stable ICU 2.4 */U_STABLE UBool U_EXPORT2u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);/** * Concatenate two ustrings. Appends a copy of <code>src</code>, * including the null terminator, to <code>dst</code>. The initial copied * character from <code>src</code> overwrites the null terminator in <code>dst</code>. * * @param dst The destination string. * @param src The source string. * @return A pointer to <code>dst</code>. * @stable ICU 2.0 */U_STABLE UChar* U_EXPORT2u_strcat(UChar *dst, const UChar *src);/** * Concatenate two ustrings. * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>. * Adds a terminating NUL. * If src is too long, then only <code>n-1</code> characters will be copied * before the terminating NUL. * If <code>n<=0</code> then dst is not modified. * * @param dst The destination string. * @param src The source string. * @param n The maximum number of characters to compare. * @return A pointer to <code>dst</code>. * @stable ICU 2.0 */U_STABLE UChar* U_EXPORT2u_strncat(UChar *dst, const UChar *src, int32_t n);/** * Find the first occurrence of a substring in a string. * The substring is found at code point boundaries. * That means that if the substring begins with * a trail surrogate or ends with a lead surrogate, * then it is found only if these surrogates stand alone in the text. * Otherwise, the substring edge units would be matched against * halves of surrogate pairs. * * @param s The string to search (NUL-terminated). * @param substring The substring to find (NUL-terminated). * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, * or <code>s</code> itself if the <code>substring</code> is empty, * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. * @stable ICU 2.0 * * @see u_strrstr * @see u_strFindFirst * @see u_strFindLast */U_STABLE UChar * U_EXPORT2u_strstr(const UChar *s, const UChar *substring);/** * Find the first occurrence of a substring in a string. * The substring is found at code point boundaries. * That means that if the substring begins with * a trail surrogate or ends with a lead surrogate, * then it is found only if these surrogates stand alone in the text. * Otherwise, the substring edge units would be matched against * halves of surrogate pairs. * * @param s The string to search. * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. * @param substring The substring to find (NUL-terminated). * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, * or <code>s</code> itself if the <code>substring</code> is empty, * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. * @stable ICU 2.4 * * @see u_strstr * @see u_strFindLast */U_STABLE UChar * U_EXPORT2u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);/** * Find the first occurrence of a BMP code point in a string. * A surrogate code point is found only if its match in the text is not * part of a surrogate pair. * A NUL character is found at the string terminator. * * @param s The string to search (NUL-terminated). * @param c The BMP code point to find. * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. * @stable ICU 2.0 * * @see u_strchr32 * @see u_memchr * @see u_strstr * @see u_strFindFirst */U_STABLE UChar * U_EXPORT2u_strchr(const UChar *s, UChar c);/** * Find the first occurrence of a code point in a string. * A surrogate code point is found only if its match in the text is not * part of a surrogate pair. * A NUL character is found at the string terminator. * * @param s The string to search (NUL-terminated). * @param c The code point to find. * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. * @stable ICU 2.0 * * @see u_strchr * @see u_memchr32 * @see u_strstr * @see u_strFindFirst */U_STABLE UChar * U_EXPORT2u_strchr32(const UChar *s, UChar32 c);/** * Find the last occurrence of a substring in a string. * The substring is found at code point boundaries. * That means that if the substring begins with * a trail surrogate or ends with a lead surrogate, * then it is found only if these surrogates stand alone in the text. * Otherwise, the substring edge units would be matched against * halves of surrogate pairs. * * @param s The string to search (NUL-terminated). * @param substring The substring to find (NUL-terminated). * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, * or <code>s</code> itself if the <code>substring</code> is empty, * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. * @stable ICU 2.4 * * @see u_strstr * @see u_strFindFirst * @see u_strFindLast */U_STABLE UChar * U_EXPORT2u_strrstr(const UChar *s, const UChar *substring);/** * Find the last occurrence of a substring in a string. * The substring is found at code point boundaries. * That means that if the substring begins with * a trail surrogate or ends with a lead surrogate, * then it is found only if these surrogates stand alone in the text. * Otherwise, the substring edge units would be matched against * halves of surrogate pairs. * * @param s The string to search. * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. * @param substring The substring to find (NUL-terminated). * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, * or <code>s</code> itself if the <code>substring</code> is empty, * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. * @stable ICU 2.4 * * @see u_strstr * @see u_strFindLast */U_STABLE UChar * U_EXPORT2u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);/** * Find the last occurrence of a BMP code point in a string. * A surrogate code point is found only if its match in the text is not * part of a surrogate pair. * A NUL character is found at the string terminator. * * @param s The string to search (NUL-terminated). * @param c The BMP code point to find. * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. * @stable ICU 2.4 * * @see u_strrchr32 * @see u_memrchr * @see u_strrstr * @see u_strFindLast */U_STABLE UChar * U_EXPORT2u_strrchr(const UChar *s, UChar c);/** * Find the last occurrence of a code point in a string. * A surrogate code point is found only if its match in the text is not * part of a surrogate pair. * A NUL character is found at the string terminator. * * @param s The string to search (NUL-terminated). * @param c The code point to find. * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. * @stable ICU 2.4 * * @see u_strrchr * @see u_memchr32 * @see u_strrstr * @see u_strFindLast */U_STABLE UChar * U_EXPORT2u_strrchr32(const UChar *s, UChar32 c);/** * Locates the first occurrence in the string <code>string</code> of any of the characters * in the string <code>matchSet</code>. * Works just like C's strpbrk but with Unicode. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -