⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ubrk.h

📁 linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自WebKit
💻 H
📖 第 1 页 / 共 2 页
字号:
/** Copyright (C) 1996-2004, International Business Machines Corporation and others. All Rights Reserved.******************************************************************************************/#ifndef UBRK_H#define UBRK_H#include "unicode/utypes.h"#include "unicode/uloc.h"/** * A text-break iterator. *  For usage in C programs. */#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR#   define UBRK_TYPEDEF_UBREAK_ITERATOR    /**     *  Opaque type representing an ICU Break iterator object.     *  @stable ICU 2.0     */    typedef void UBreakIterator;#endif#if !UCONFIG_NO_BREAK_ITERATION#include "unicode/parseerr.h"/** * \file * \brief C API: BreakIterator * * <h2> BreakIterator C API </h2> * * The BreakIterator C API defines  methods for finding the location * of boundaries in text. Pointer to a UBreakIterator maintain a * current position and scan over text returning the index of characters * where boundaries occur. * <P> * Line boundary analysis determines where a text string can be broken * when line-wrapping. The mechanism correctly handles punctuation and * hyphenated words. * <P> * Sentence boundary analysis allows selection with correct * interpretation of periods within numbers and abbreviations, and * trailing punctuation marks such as quotation marks and parentheses. * <P> * Word boundary analysis is used by search and replace functions, as * well as within text editing applications that allow the user to * select words with a double click. Word selection provides correct * interpretation of punctuation marks within and following * words. Characters that are not part of a word, such as symbols or * punctuation marks, have word-breaks on both sides. * <P> * Character boundary analysis allows users to interact with * characters as they expect to, for example, when moving the cursor * through a text string. Character boundary analysis provides correct * navigation of through character strings, regardless of how the * character is stored.  For example, an accented character might be * stored as a base character and a diacritical mark. What users * consider to be a character can differ between languages. * <P> * Title boundary analysis locates all positions, * typically starts of words, that should be set to Title Case * when title casing the text. * <P> * * This is the interface for all text boundaries. * <P> * Examples: * <P> * Helper function to output text * <pre> * \code *    void printTextRange(UChar* str, int32_t start, int32_t end ) { *         UChar* result; *         UChar* temp; *         const char* res; *         temp=(UChar*)malloc(sizeof(UChar) * ((u_strlen(str)-start)+1)); *         result=(UChar*)malloc(sizeof(UChar) * ((end-start)+1)); *         u_strcpy(temp, &str[start]); *         u_strncpy(result, temp, end-start); *         res=(char*)malloc(sizeof(char) * (u_strlen(result)+1)); *         u_austrcpy(res, result); *         printf("%s\n", res); *    } * \endcode * </pre> * Print each element in order: * <pre> * \code *    void printEachForward( UBreakIterator* boundary, UChar* str) { *       int32_t end; *       int32_t start = ubrk_first(boundary); *       for (end = ubrk_next(boundary)); end != UBRK_DONE; start = end, end = ubrk_next(boundary)) { *             printTextRange(str, start, end ); *         } *    } * \endcode * </pre> * Print each element in reverse order: * <pre> * \code *    void printEachBackward( UBreakIterator* boundary, UChar* str) { *       int32_t start; *       int32_t end = ubrk_last(boundary); *       for (start = ubrk_previous(boundary); start != UBRK_DONE;  end = start, start =ubrk_previous(boundary)) { *             printTextRange( str, start, end ); *         } *    } * \endcode * </pre> * Print first element * <pre> * \code *    void printFirst(UBreakIterator* boundary, UChar* str) { *        int32_t end; *        int32_t start = ubrk_first(boundary); *        end = ubrk_next(boundary); *        printTextRange( str, start, end ); *    } * \endcode * </pre> * Print last element * <pre> * \code *    void printLast(UBreakIterator* boundary, UChar* str) { *        int32_t start; *        int32_t end = ubrk_last(boundary); *        start = ubrk_previous(boundary); *        printTextRange(str, start, end ); *    } * \endcode * </pre> * Print the element at a specified position * <pre> * \code *    void printAt(UBreakIterator* boundary, int32_t pos , UChar* str) { *        int32_t start; *        int32_t end = ubrk_following(boundary, pos); *        start = ubrk_previous(boundary); *        printTextRange(str, start, end ); *    } * \endcode * </pre> * Creating and using text boundaries * <pre> * \code *       void BreakIterator_Example( void ) { *           UBreakIterator* boundary; *           UChar *stringToExamine; *           stringToExamine=(UChar*)malloc(sizeof(UChar) * (strlen("Aaa bbb ccc. Ddd eee fff.")+1) ); *           u_uastrcpy(stringToExamine, "Aaa bbb ccc. Ddd eee fff."); *           printf("Examining: "Aaa bbb ccc. Ddd eee fff."); * *           //print each sentence in forward and reverse order *           boundary = ubrk_open(UBRK_SENTENCE, "en_us", stringToExamine, u_strlen(stringToExamine), &status); *           printf("----- forward: -----------\n"); *           printEachForward(boundary, stringToExamine); *           printf("----- backward: ----------\n"); *           printEachBackward(boundary, stringToExamine); *           ubrk_close(boundary); * *           //print each word in order *           boundary = ubrk_open(UBRK_WORD, "en_us", stringToExamine, u_strlen(stringToExamine), &status); *           printf("----- forward: -----------\n"); *           printEachForward(boundary, stringToExamine); *           printf("----- backward: ----------\n"); *           printEachBackward(boundary, stringToExamine); *           //print first element *           printf("----- first: -------------\n"); *           printFirst(boundary, stringToExamine); *           //print last element *           printf("----- last: --------------\n"); *           printLast(boundary, stringToExamine); *           //print word at charpos 10 *           printf("----- at pos 10: ---------\n"); *           printAt(boundary, 10 , stringToExamine); * *           ubrk_close(boundary); *       } * \endcode * </pre> *//** The possible types of text boundaries.  @stable ICU 2.0 */typedef enum UBreakIteratorType {  /** Character breaks  @stable ICU 2.0 */  UBRK_CHARACTER,  /** Word breaks @stable ICU 2.0 */  UBRK_WORD,  /** Line breaks @stable ICU 2.0 */  UBRK_LINE,  /** Sentence breaks @stable ICU 2.0 */  UBRK_SENTENCE,#ifndef U_HIDE_DEPRECATED_API  /**    * Title Case breaks    * The iterator created using this type locates title boundaries as described for    * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,   * please use Word Boundary iterator.   *   * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.   */  UBRK_TITLE#endif /* U_HIDE_DEPRECATED_API */} UBreakIteratorType;/** Value indicating all text boundaries have been returned. *  @stable ICU 2.0  */#define UBRK_DONE ((int32_t) -1)/** *  Enum constants for the word break tags returned by *  getRuleStatus().  A range of values is defined for each category of *  word, to allow for further subdivisions of a category in future releases. *  Applications should check for tag values falling within the range, rather *  than for single individual values. *  @stable ICU 2.2*/typedef enum UWordBreak {    /** Tag value for "words" that do not fit into any of other categories.      *  Includes spaces and most punctuation. */    UBRK_WORD_NONE           = 0,    /** Upper bound for tags for uncategorized words. */    UBRK_WORD_NONE_LIMIT     = 100,    /** Tag value for words that appear to be numbers, lower limit.    */    UBRK_WORD_NUMBER         = 100,    /** Tag value for words that appear to be numbers, upper limit.    */    UBRK_WORD_NUMBER_LIMIT   = 200,    /** Tag value for words that contain letters, excluding     *  hiragana, katakana or ideographic characters, lower limit.    */    UBRK_WORD_LETTER         = 200,    /** Tag value for words containing letters, upper limit  */    UBRK_WORD_LETTER_LIMIT   = 300,    /** Tag value for words containing kana characters, lower limit */    UBRK_WORD_KANA           = 300,    /** Tag value for words containing kana characters, upper limit */    UBRK_WORD_KANA_LIMIT     = 400,    /** Tag value for words containing ideographic characters, lower limit */    UBRK_WORD_IDEO           = 400,    /** Tag value for words containing ideographic characters, upper limit */    UBRK_WORD_IDEO_LIMIT     = 500} UWordBreak;/** *  Enum constants for the line break tags returned by getRuleStatus(). *  A range of values is defined for each category of *  word, to allow for further subdivisions of a category in future releases. *  Applications should check for tag values falling within the range, rather *  than for single individual values. *  @draft ICU 2.8*/typedef enum ULineBreakTag {    /** Tag value for soft line breaks, positions at which a line break      *  is acceptable but not required                */    UBRK_LINE_SOFT            = 0,    /** Upper bound for soft line breaks.              */    UBRK_LINE_SOFT_LIMIT      = 100,    /** Tag value for a hard, or mandatory line break  */    UBRK_LINE_HARD            = 100,    /** Upper bound for hard line breaks.              */    UBRK_LINE_HARD_LIMIT      = 200} ULineBreakTag;/** *  Enum constants for the sentence break tags returned by getRuleStatus(). *  A range of values is defined for each category of *  sentence, to allow for further subdivisions of a category in future releases. *  Applications should check for tag values falling within the range, rather *  than for single individual values. *  @draft ICU 2.8*/typedef enum USentenceBreakTag {    /** Tag value for for sentences  ending with a sentence terminator      * ('.', '?', '!', etc.) character, possibly followed by a

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -