📄 uset.h
字号:
/********************************************************************************** Copyright (C) 2002-2004, International Business Machines* Corporation and others. All Rights Reserved.********************************************************************************* file name: uset.h* encoding: US-ASCII* tab size: 8 (not used)* indentation:4** created on: 2002mar07* created by: Markus W. Scherer** C version of UnicodeSet.*//** * \file * \brief C API: Unicode Set * * <p>This is a C wrapper around the C++ UnicodeSet class.</p> */#ifndef __USET_H__#define __USET_H__#include "unicode/utypes.h"#include "unicode/uchar.h"#ifndef UCNV_Hstruct USet;/** * A UnicodeSet. Use the uset_* API to manipulate. Create with * uset_open*, and destroy with uset_close. * @stable ICU 2.4 */typedef struct USet USet;#endif/** * Bitmask values to be passed to uset_openPatternOptions() or * uset_applyPattern() taking an option parameter. * @stable ICU 2.4 */enum { /** * Ignore white space within patterns unless quoted or escaped. * @stable ICU 2.4 */ USET_IGNORE_SPACE = 1, /** * Enable case insensitive matching. E.g., "[ab]" with this flag * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will * match all except 'a', 'A', 'b', and 'B'. This performs a full * closure over case mappings, e.g. U+017F for s. * @stable ICU 2.4 */ USET_CASE_INSENSITIVE = 2, /** * Bitmask for UnicodeSet::closeOver() indicating letter case. * This may be ORed together with other selectors. * @internal */ USET_CASE = 2, /** * Enable case insensitive matching. E.g., "[ab]" with this flag * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will * match all except 'a', 'A', 'b', and 'B'. This adds the lower-, * title-, and uppercase mappings as well as the case folding * of each existing element in the set. * @draft ICU 3.2 */ USET_ADD_CASE_MAPPINGS = 4, /** * Enough for any single-code point set * @internal */ USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8};/** * A serialized form of a Unicode set. Limited manipulations are * possible directly on a serialized set. See below. * @stable ICU 2.4 */typedef struct USerializedSet { /** * The serialized Unicode Set. * @stable ICU 2.4 */ const uint16_t *array; /** * The length of the array that contains BMP characters. * @stable ICU 2.4 */ int32_t bmpLength; /** * The total length of the array. * @stable ICU 2.4 */ int32_t length; /** * A small buffer for the array to reduce memory allocations. * @stable ICU 2.4 */ uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];} USerializedSet;/********************************************************************* * USet API *********************************************************************//** * Creates a USet object that contains the range of characters * start..end, inclusive. * @param start first character of the range, inclusive * @param end last character of the range, inclusive * @return a newly created USet. The caller must call uset_close() on * it when done. * @stable ICU 2.4 */U_STABLE USet* U_EXPORT2uset_open(UChar32 start, UChar32 end);/** * Creates a set from the given pattern. See the UnicodeSet class * description for the syntax of the pattern language. * @param pattern a string specifying what characters are in the set * @param patternLength the length of the pattern, or -1 if null * terminated * @param ec the error code * @stable ICU 2.4 */U_STABLE USet* U_EXPORT2uset_openPattern(const UChar* pattern, int32_t patternLength, UErrorCode* ec);/** * Creates a set from the given pattern. See the UnicodeSet class * description for the syntax of the pattern language. * @param pattern a string specifying what characters are in the set * @param patternLength the length of the pattern, or -1 if null * terminated * @param options bitmask for options to apply to the pattern. * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. * @param ec the error code * @stable ICU 2.4 */U_STABLE USet* U_EXPORT2uset_openPatternOptions(const UChar* pattern, int32_t patternLength, uint32_t options, UErrorCode* ec);/** * Disposes of the storage used by a USet object. This function should * be called exactly once for objects returned by uset_open(). * @param set the object to dispose of * @stable ICU 2.4 */U_STABLE void U_EXPORT2uset_close(USet* set);/** * Causes the USet object to represent the range <code>start - end</code>. * If <code>start > end</code> then this USet is set to an empty range. * @param set the object to set to the given range * @param start first character in the set, inclusive * @param end last character in the set, inclusive * @draft ICU 3.2 */U_DRAFT void U_EXPORT2uset_set(USet* set, UChar32 start, UChar32 end);/** * Modifies the set to represent the set specified by the given * pattern. See the UnicodeSet class description for the syntax of * the pattern language. See also the User Guide chapter about UnicodeSet. * <em>Empties the set passed before applying the pattern.</em> * @param set The set to which the pattern is to be applied. * @param pattern A pointer to UChar string specifying what characters are in the set. * The character at pattern[0] must be a '['. * @param patternLength The length of the UChar string. -1 if NUL terminated. * @param options A bitmask for options to apply to the pattern. * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. * @param status Returns an error if the pattern cannot be parsed. * @return Upon successful parse, the value is either * the index of the character after the closing ']' * of the parsed pattern. * If the status code indicates failure, then the return value * is the index of the error in the source. * * @draft ICU 2.8 */U_DRAFT int32_t U_EXPORT2 uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status);/** * Modifies the set to contain those code points which have the given value * for the given binary or enumerated property, as returned by * u_getIntPropertyValue. Prior contents of this set are lost. * * @param set the object to contain the code points defined by the property * * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 * or UCHAR_INT_START..UCHAR_INT_LIMIT-1 * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1. * * @param value a value in the range u_getIntPropertyMinValue(prop).. * u_getIntPropertyMaxValue(prop), with one exception. If prop is * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but * rather a mask value produced by U_GET_GC_MASK(). This allows grouped * categories such as [:L:] to be represented. * * @param ec error code input/output parameter * * @draft ICU 3.2 */U_DRAFT void U_EXPORT2uset_applyIntPropertyValue(USet* set, UProperty prop, int32_t value, UErrorCode* ec);/** * Modifies the set to contain those code points which have the * given value for the given property. Prior contents of this * set are lost. * * @param set the object to contain the code points defined by the given * property and value alias * * @param prop a string specifying a property alias, either short or long. * The name is matched loosely. See PropertyAliases.txt for names and a * description of loose matching. If the value string is empty, then this * string is interpreted as either a General_Category value alias, a Script * value alias, a binary property alias, or a special ID. Special IDs are * matched loosely and correspond to the following sets: * * "ANY" = [\\u0000-\\U0010FFFF], * "ASCII" = [\\u0000-\\u007F]. * * @param propLength the length of the prop, or -1 if NULL * * @param value a string specifying a value alias, either short or long. * The name is matched loosely. See PropertyValueAliases.txt for names * and a description of loose matching. In addition to aliases listed, * numeric values and canonical combining classes may be expressed * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string * may also be empty. * * @param valueLength the length of the value, or -1 if NULL * * @param ec error code input/output parameter * * @draft ICU 3.2 */U_DRAFT void U_EXPORT2uset_applyPropertyAlias(USet* set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode* ec);/** * Return true if the given position, in the given pattern, appears * to be the start of a UnicodeSet pattern. * * @param pattern a string specifying the pattern * @param patternLength the length of the pattern, or -1 if NULL * @param pos the given position * @draft ICU 3.2 */U_DRAFT UBool U_EXPORT2uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos);/** * Returns a string representation of this set. If the result of * calling this function is passed to a uset_openPattern(), it * will produce another set that is equal to this one. * @param set the set * @param result the string to receive the rules, may be NULL * @param resultCapacity the capacity of result, may be 0 if result is NULL * @param escapeUnprintable if TRUE then convert unprintable * character to their hex escape representations, \\uxxxx or * \\Uxxxxxxxx. Unprintable characters are those other than * U+000A, U+0020..U+007E. * @param ec error code. * @return length of string, possibly larger than resultCapacity * @stable ICU 2.4 */U_STABLE int32_t U_EXPORT2uset_toPattern(const USet* set, UChar* result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode* ec);/** * Adds the given character to the given USet. After this call, * uset_contains(set, c) will return TRUE. * @param set the object to which to add the character * @param c the character to add * @stable ICU 2.4 */U_STABLE void U_EXPORT2uset_add(USet* set, UChar32 c);/** * Adds all of the elements in the specified set to this set if * they're not already present. This operation effectively * modifies this set so that its value is the <i>union</i> of the two * sets. The behavior of this operation is unspecified if the specified * collection is modified while the operation is in progress. * * @param set the object to which to add the set * @param additionalSet the source set whose elements are to be added to this set. * @stable ICU 2.6 */U_STABLE void U_EXPORT2uset_addAll(USet* set, const USet *additionalSet);/** * Adds the given range of characters to the given USet. After this call, * uset_contains(set, start, end) will return TRUE. * @param set the object to which to add the character * @param start the first character of the range to add, inclusive * @param end the last character of the range to add, inclusive * @stable ICU 2.2 */U_STABLE void U_EXPORT2uset_addRange(USet* set, UChar32 start, UChar32 end);/** * Adds the given string to the given USet. After this call, * uset_containsString(set, str, strLen) will return TRUE. * @param set the object to which to add the character * @param str the string to add * @param strLen the length of the string or -1 if null terminated. * @stable ICU 2.4 */U_STABLE void U_EXPORT2uset_addString(USet* set, const UChar* str, int32_t strLen);/** * Removes the given character from the given USet. After this call, * uset_contains(set, c) will return FALSE. * @param set the object from which to remove the character * @param c the character to remove * @stable ICU 2.4 */U_STABLE void U_EXPORT2uset_remove(USet* set, UChar32 c);/** * Removes the given range of characters from the given USet. After this call, * uset_contains(set, start, end) will return FALSE. * @param set the object to which to add the character * @param start the first character of the range to remove, inclusive * @param end the last character of the range to remove, inclusive * @stable ICU 2.2 */U_STABLE void U_EXPORT2uset_removeRange(USet* set, UChar32 start, UChar32 end);/**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -