📄 ubrk.h
字号:
* hard separator (CR, LF, PS, etc.) */ UBRK_SENTENCE_TERM = 0, /** Upper bound for tags for sentences ended by sentence terminators. */ UBRK_SENTENCE_TERM_LIMIT = 100, /** Tag value for for sentences that do not contain an ending * sentence terminator ('.', '?', '!', etc.) character, but * are ended only by a hard separator (CR, LF, PS, etc.) or end of input. */ UBRK_SENTENCE_SEP = 100, /** Upper bound for tags for sentences ended by a separator. */ UBRK_SENTENCE_SEP_LIMIT = 200 /** Tag value for a hard, or mandatory line break */} USentenceBreakTag;/** * Open a new UBreakIterator for locating text boundaries for a specified locale. * A UBreakIterator may be used for detecting character, line, word, * and sentence breaks in text. * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD, * UBRK_LINE, UBRK_SENTENCE * @param locale The locale specifying the text-breaking conventions. * @param text The text to be iterated over. * @param textLength The number of characters in text, or -1 if null-terminated. * @param status A UErrorCode to receive any errors. * @return A UBreakIterator for the specified locale. * @see ubrk_openRules * @stable ICU 2.0 */U_STABLE UBreakIterator* U_EXPORT2ubrk_open(UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status);/** * Open a new UBreakIterator for locating text boundaries using specified breaking rules. * The rule syntax is ... (TBD) * @param rules A set of rules specifying the text breaking conventions. * @param rulesLength The number of characters in rules, or -1 if null-terminated. * @param text The text to be iterated over. May be null, in which case ubrk_setText() is * used to specify the text to be iterated. * @param textLength The number of characters in text, or -1 if null-terminated. * @param parseErr Receives position and context information for any syntax errors * detected while parsing the rules. * @param status A UErrorCode to receive any errors. * @return A UBreakIterator for the specified rules. * @see ubrk_open * @stable ICU 2.2 */U_STABLE UBreakIterator* U_EXPORT2ubrk_openRules(const UChar *rules, int32_t rulesLength, const UChar *text, int32_t textLength, UParseError *parseErr, UErrorCode *status);/** * Thread safe cloning operation * @param bi iterator to be cloned * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. * If buffer is not large enough, new memory will be allocated. * Clients can use the U_BRK_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations. * @param pBufferSize pointer to size of allocated space. * If *pBufferSize == 0, a sufficient size for use in cloning will * be returned ('pre-flighting') * If *pBufferSize is not enough for a stack-based safe clone, * new memory will be allocated. * @param status to indicate whether the operation went on smoothly or there were errors * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary. * @return pointer to the new clone * @stable ICU 2.0 */U_STABLE UBreakIterator * U_EXPORT2ubrk_safeClone( const UBreakIterator *bi, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);/** * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone(). * @stable ICU 2.0 */#define U_BRK_SAFECLONE_BUFFERSIZE 512/*** Close a UBreakIterator.* Once closed, a UBreakIterator may no longer be used.* @param bi The break iterator to close. * @stable ICU 2.0*/U_STABLE void U_EXPORT2ubrk_close(UBreakIterator *bi);/** * Sets an existing iterator to point to a new piece of text * @param bi The iterator to use * @param text The text to be set * @param textLength The length of the text * @param status The error code * @stable ICU 2.0 */U_STABLE void U_EXPORT2ubrk_setText(UBreakIterator* bi, const UChar* text, int32_t textLength, UErrorCode* status);/** * Determine the most recently-returned text boundary. * * @param bi The break iterator to use. * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous, * \ref ubrk_first, or \ref ubrk_last. * @stable ICU 2.0 */U_STABLE int32_t U_EXPORT2ubrk_current(const UBreakIterator *bi);/** * Determine the text boundary following the current text boundary. * * @param bi The break iterator to use. * @return The character index of the next text boundary, or UBRK_DONE * if all text boundaries have been returned. * @see ubrk_previous * @stable ICU 2.0 */U_STABLE int32_t U_EXPORT2ubrk_next(UBreakIterator *bi);/** * Determine the text boundary preceding the current text boundary. * * @param bi The break iterator to use. * @return The character index of the preceding text boundary, or UBRK_DONE * if all text boundaries have been returned. * @see ubrk_next * @stable ICU 2.0 */U_STABLE int32_t U_EXPORT2ubrk_previous(UBreakIterator *bi);/** * Determine the index of the first character in the text being scanned. * This is not always the same as index 0 of the text. * @param bi The break iterator to use. * @return The character index of the first character in the text being scanned. * @see ubrk_last * @stable ICU 2.0 */U_STABLE int32_t U_EXPORT2ubrk_first(UBreakIterator *bi);/** * Determine the index immediately <EM>beyond</EM> the last character in the text being * scanned. * This is not the same as the last character. * @param bi The break iterator to use. * @return The character offset immediately <EM>beyond</EM> the last character in the * text being scanned. * @see ubrk_first * @stable ICU 2.0 */U_STABLE int32_t U_EXPORT2ubrk_last(UBreakIterator *bi);/** * Determine the text boundary preceding the specified offset. * The value returned is always smaller than offset, or UBRK_DONE. * @param bi The break iterator to use. * @param offset The offset to begin scanning. * @return The text boundary preceding offset, or UBRK_DONE. * @see ubrk_following * @stable ICU 2.0 */U_STABLE int32_t U_EXPORT2ubrk_preceding(UBreakIterator *bi, int32_t offset);/** * Determine the text boundary following the specified offset. * The value returned is always greater than offset, or UBRK_DONE. * @param bi The break iterator to use. * @param offset The offset to begin scanning. * @return The text boundary following offset, or UBRK_DONE. * @see ubrk_preceding * @stable ICU 2.0 */U_STABLE int32_t U_EXPORT2ubrk_following(UBreakIterator *bi, int32_t offset);/*** Get a locale for which text breaking information is available.* A UBreakIterator in a locale returned by this function will perform the correct* text breaking for the locale.* @param index The index of the desired locale.* @return A locale for which number text breaking information is available, or 0 if none.* @see ubrk_countAvailable* @stable ICU 2.0*/U_STABLE const char* U_EXPORT2ubrk_getAvailable(int32_t index);/*** Determine how many locales have text breaking information available.* This function is most useful as determining the loop ending condition for* calls to \ref ubrk_getAvailable.* @return The number of locales for which text breaking information is available.* @see ubrk_getAvailable* @stable ICU 2.0*/U_STABLE int32_t U_EXPORT2ubrk_countAvailable(void);/*** Returns true if the specfied position is a boundary position. As a side* effect, leaves the iterator pointing to the first boundary position at* or after "offset".* @param bi The break iterator to use.* @param offset the offset to check.* @return True if "offset" is a boundary position.* @stable ICU 2.0*/U_STABLE UBool U_EXPORT2ubrk_isBoundary(UBreakIterator *bi, int32_t offset);/** * Return the status from the break rule that determined the most recently * returned break position. The values appear in the rule source * within brackets, {123}, for example. For rules that do not specify a * status, a default value of 0 is returned. * <p> * For word break iterators, the possible values are defined in enum UWordBreak. * @stable ICU 2.2 */U_STABLE int32_t U_EXPORT2ubrk_getRuleStatus(UBreakIterator *bi);/** * Get the statuses from the break rules that determined the most recently * returned break position. The values appear in the rule source * within brackets, {123}, for example. The default status value for rules * that do not explicitly provide one is zero. * <p> * For word break iterators, the possible values are defined in enum UWordBreak. * @param bi The break iterator to use * @param fillInVec an array to be filled in with the status values. * @param capacity the length of the supplied vector. A length of zero causes * the function to return the number of status values, in the * normal way, without attemtping to store any values. * @param status receives error codes. * @return The number of rule status values from rules that determined * the most recent boundary returned by the break iterator. * @draft ICU 3.0 */U_DRAFT int32_t U_EXPORT2ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);/** * Return the locale of the break iterator. You can choose between the valid and * the actual locale. * @param bi break iterator * @param type locale type (valid or actual) * @param status error code * @return locale string * @draft ICU 2.8 likely to change in ICU 3.0, based on feedback */U_DRAFT const char* U_EXPORT2ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);#endif /* #if !UCONFIG_NO_BREAK_ITERATION */#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -