📄 break.c
字号:
case STATE_SENTENCE_DOT: switch (type) { case G_UNICODE_CLOSE_PUNCTUATION: sentence_state = STATE_SENTENCE_POST_DOT_CLOSE; break; case G_UNICODE_SPACE_SEPARATOR: possible_sentence_end = i; sentence_state = STATE_SENTENCE_POST_DOT_SPACE; break; default: /* If we broke on a control/format char, end the * sentence; else this was not a sentence end, since * we didn't enter the POST_DOT_SPACE state. */ if (attrs[i].is_sentence_boundary) { attrs[i].is_sentence_end = TRUE; MAYBE_START_NEW_SENTENCE; } else sentence_state = STATE_SENTENCE_BODY; break; } break; case STATE_SENTENCE_POST_DOT_CLOSE: switch (type) { case G_UNICODE_SPACE_SEPARATOR: possible_sentence_end = i; sentence_state = STATE_SENTENCE_POST_DOT_SPACE; break; default: /* If we broke on a control/format char, end the * sentence; else this was not a sentence end, since * we didn't enter the POST_DOT_SPACE state. */ if (attrs[i].is_sentence_boundary) { attrs[i].is_sentence_end = TRUE; MAYBE_START_NEW_SENTENCE; } else sentence_state = STATE_SENTENCE_BODY; break; } break; case STATE_SENTENCE_POST_DOT_SPACE: possible_sentence_boundary = i; switch (type) { case G_UNICODE_SPACE_SEPARATOR: /* remain in current state */ break; case G_UNICODE_OPEN_PUNCTUATION: sentence_state = STATE_SENTENCE_POST_DOT_OPEN; break; case G_UNICODE_LOWERCASE_LETTER: /* wasn't a sentence-ending period; so re-enter the sentence * body */ sentence_state = STATE_SENTENCE_BODY; break; default: /* End the sentence, break, maybe start a new one */ g_assert (possible_sentence_end >= 0); g_assert (possible_sentence_boundary >= 0); attrs[possible_sentence_boundary].is_sentence_boundary = TRUE; attrs[possible_sentence_end].is_sentence_end = TRUE; possible_sentence_end = -1; possible_sentence_boundary = -1; MAYBE_START_NEW_SENTENCE; break; } break; case STATE_SENTENCE_POST_DOT_OPEN: switch (type) { case G_UNICODE_OPEN_PUNCTUATION: /* continue in current state */ break; case G_UNICODE_LOWERCASE_LETTER: /* wasn't a sentence-ending period; so re-enter the sentence * body */ sentence_state = STATE_SENTENCE_BODY; break; default: /* End the sentence, break, maybe start a new one */ g_assert (possible_sentence_end >= 0); g_assert (possible_sentence_boundary >= 0); attrs[possible_sentence_boundary].is_sentence_boundary = TRUE; attrs[possible_sentence_end].is_sentence_end = TRUE; possible_sentence_end = -1; possible_sentence_boundary = -1; MAYBE_START_NEW_SENTENCE; break; } break; case STATE_SENTENCE_POST_DOT_SEP: /* Break is forced at this point, unless we're a newline * after a CR, then we will break after the newline on the * next iteration. Only a single Sep can be in the * sentence. */ if (!(prev_wc == '\r' && wc == '\n')) attrs[i].is_sentence_boundary = TRUE; g_assert (possible_sentence_end >= 0); g_assert (possible_sentence_boundary >= 0); attrs[possible_sentence_end].is_sentence_end = TRUE; possible_sentence_end = -1; possible_sentence_boundary = -1; MAYBE_START_NEW_SENTENCE; break; default: g_assert_not_reached (); break; } prev_type = type; prev_wc = wc; /* wc might not be a valid Unicode base character, but really all we * need to know is the last non-combining character */ if (type != G_UNICODE_COMBINING_MARK && type != G_UNICODE_ENCLOSING_MARK && type != G_UNICODE_NON_SPACING_MARK) base_character = wc; }}static gbooleantailor_break (const gchar *text, gint length, PangoAnalysis *analysis, PangoLogAttr *attrs, int attrs_len){ if (analysis->lang_engine && PANGO_ENGINE_LANG_GET_CLASS (analysis->lang_engine)->script_break) { if (length < 0) length = strlen (text); else if (text == NULL) text = ""; PANGO_ENGINE_LANG_GET_CLASS (analysis->lang_engine)->script_break (analysis->lang_engine, text, length, analysis, attrs, attrs_len); return TRUE; } return FALSE;}/** * pango_break: * @text: the text to process * @length: length of @text in bytes (may be -1 if @text is nul-terminated) * @analysis: #PangoAnalysis structure from pango_itemize() * @attrs: an array to store character information in * @attrs_len: size of the array passed as @attrs * * Determines possible line, word, and character breaks * for a string of Unicode text with a single analysis. For most * purposes you may want to use pango_get_log_attrs(). */voidpango_break (const gchar *text, gint length, PangoAnalysis *analysis, PangoLogAttr *attrs, int attrs_len){ g_return_if_fail (analysis != NULL); g_return_if_fail (attrs != NULL); pango_default_break (text, length, analysis, attrs, attrs_len); tailor_break (text, length, analysis, attrs, attrs_len);}/** * pango_find_paragraph_boundary: * @text: UTF-8 text * @length: length of @text in bytes, or -1 if nul-terminated * @paragraph_delimiter_index: return location for index of delimiter * @next_paragraph_start: return location for start of next paragraph * * Locates a paragraph boundary in @text. A boundary is caused by * delimiter characters, such as a newline, carriage return, carriage * return-newline pair, or Unicode paragraph separator character. The * index of the run of delimiters is returned in * @paragraph_delimiter_index. The index of the start of the paragraph * (index after all delimiters) is stored in @next_paragraph_start. * * If no delimiters are found, both @paragraph_delimiter_index and * @next_paragraph_start are filled with the length of @text (an index one * off the end). **/voidpango_find_paragraph_boundary (const gchar *text, gint length, gint *paragraph_delimiter_index, gint *next_paragraph_start){ const gchar *p = text; const gchar *end; const gchar *start = NULL; const gchar *delimiter = NULL; /* Only one character has type G_UNICODE_PARAGRAPH_SEPARATOR in * Unicode 5.0; update the following code if that changes. */ /* prev_sep is the first byte of the previous separator. Since * the valid separators are \r, \n, and PARAGRAPH_SEPARATOR, the * first byte is enough to identify it. */ gchar prev_sep; if (length < 0) length = strlen (text); end = text + length; if (paragraph_delimiter_index) *paragraph_delimiter_index = length; if (next_paragraph_start) *next_paragraph_start = length; if (length == 0) return; prev_sep = 0; while (p != end) { if (prev_sep == '\n' || prev_sep == PARAGRAPH_SEPARATOR_STRING[0]) { g_assert (delimiter); start = p; break; } else if (prev_sep == '\r') { /* don't break between \r and \n */ if (*p != '\n') { g_assert (delimiter); start = p; break; } } if (*p == '\n' || *p == '\r' || !strncmp(p, PARAGRAPH_SEPARATOR_STRING, strlen(PARAGRAPH_SEPARATOR_STRING))) { if (delimiter == NULL) delimiter = p; prev_sep = *p; } else prev_sep = 0; p = g_utf8_next_char (p); } if (delimiter && paragraph_delimiter_index) *paragraph_delimiter_index = delimiter - text; if (start && next_paragraph_start) *next_paragraph_start = start - text;}static inttailor_segment (const char *range_start, const char *range_end, PangoEngineLang *range_engine, int chars_broken, PangoAnalysis *analysis, PangoLogAttr *log_attrs){ int chars_in_range; PangoLogAttr attr_before = log_attrs[0]; analysis->lang_engine = range_engine; chars_in_range = g_utf8_strlen (range_start, range_end - range_start); if (tailor_break (range_start, range_end - range_start, analysis, log_attrs + chars_broken, chars_in_range + 1)) { /* if tailored, we enforce some of the attrs from before tailoring at * the boundary */ log_attrs[0].backspace_deletes_character = attr_before.backspace_deletes_character; log_attrs[0].is_line_break |= attr_before.is_line_break; log_attrs[0].is_mandatory_break |= attr_before.is_mandatory_break; log_attrs[0].is_cursor_position |= attr_before.is_cursor_position; } return chars_in_range;}/** * pango_get_log_attrs: * @text: text to process * @length: length in bytes of @text * @level: embedding level, or -1 if unknown * @language: language tag * @log_attrs: array with one #PangoLogAttr per character in @text, plus one extra, to be filled in * @attrs_len: length of @log_attrs array * * Computes a #PangoLogAttr for each character in @text. The @log_attrs * array must have one #PangoLogAttr for each position in @text; if * @text contains N characters, it has N+1 positions, including the * last position at the end of the text. @text should be an entire * paragraph; logical attributes can't be computed without context * (for example you need to see spaces on either side of a word to know * the word is a word). */voidpango_get_log_attrs (const char *text, int length, int level, PangoLanguage *language, PangoLogAttr *log_attrs, int attrs_len){ PangoMap *lang_map; int chars_broken; const char *range_start, *range_end; PangoScript script; PangoEngineLang *range_engine; static guint engine_type_id = 0; static guint render_type_id = 0; PangoAnalysis analysis = { NULL }; PangoScriptIter *iter; g_return_if_fail (length == 0 || text != NULL); g_return_if_fail (log_attrs != NULL); analysis.level = level; pango_default_break (text, length, &analysis, log_attrs, attrs_len); if (engine_type_id == 0) { engine_type_id = g_quark_from_static_string (PANGO_ENGINE_TYPE_LANG); render_type_id = g_quark_from_static_string (PANGO_RENDER_TYPE_NONE); } lang_map = pango_find_map (language, engine_type_id, render_type_id); chars_broken = 0; iter = pango_script_iter_new (text, length); pango_script_iter_get_range (iter, &range_start, &range_end, &script); range_engine = (PangoEngineLang*) pango_map_get_engine (lang_map, script); g_assert (range_start == text); while (pango_script_iter_next (iter)) { const char *run_start, *run_end; PangoEngineLang* run_engine; pango_script_iter_get_range (iter, &run_start, &run_end, &script); run_engine = (PangoEngineLang*) pango_map_get_engine (lang_map, script); g_assert (range_end == run_start); if (range_engine != run_engine) { /* Engine has changed; do the tailoring for the current range, * then start a new range. */ chars_broken += tailor_segment (range_start, range_end, range_engine, chars_broken, &analysis, log_attrs); range_start = run_start; range_engine = run_engine; } range_end = run_end; } pango_script_iter_free (iter); g_assert (length < 0 || range_end == text + length); chars_broken += tailor_segment (range_start, range_end, range_engine, chars_broken, &analysis, log_attrs); if (chars_broken + 1 > attrs_len) g_warning ("pango_get_log_attrs: attrs_len should have been at least %d, but was %d. Expect corrupted memory.", chars_broken + 1, attrs_len);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -