📄 break.c

📁 Pango is a library for layout and rendering of text, with an emphasis on internationalization. Pang
💻 C
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
	case STATE_SENTENCE_DOT:	  switch (type)	    {	    case G_UNICODE_CLOSE_PUNCTUATION:	      sentence_state = STATE_SENTENCE_POST_DOT_CLOSE;	      break;	    case G_UNICODE_SPACE_SEPARATOR:	      possible_sentence_end = i;	      sentence_state = STATE_SENTENCE_POST_DOT_SPACE;	      break;	    default:	      /* If we broke on a control/format char, end the	       * sentence; else this was not a sentence end, since	       * we didn't enter the POST_DOT_SPACE state.	       */	      if (attrs[i].is_sentence_boundary)		{		  attrs[i].is_sentence_end = TRUE;		  MAYBE_START_NEW_SENTENCE;		}	      else		sentence_state = STATE_SENTENCE_BODY;	      break;	    }	  break;	case STATE_SENTENCE_POST_DOT_CLOSE:	  switch (type)	    {	    case G_UNICODE_SPACE_SEPARATOR:	      possible_sentence_end = i;	      sentence_state = STATE_SENTENCE_POST_DOT_SPACE;	      break;	    default:	      /* If we broke on a control/format char, end the	       * sentence; else this was not a sentence end, since	       * we didn't enter the POST_DOT_SPACE state.	       */	      if (attrs[i].is_sentence_boundary)		{		  attrs[i].is_sentence_end = TRUE;		  MAYBE_START_NEW_SENTENCE;		}	      else		sentence_state = STATE_SENTENCE_BODY;	      break;	    }	  break;	case STATE_SENTENCE_POST_DOT_SPACE:	  possible_sentence_boundary = i;	  switch (type)	    {	    case G_UNICODE_SPACE_SEPARATOR:	      /* remain in current state */	      break;	    case G_UNICODE_OPEN_PUNCTUATION:	      sentence_state = STATE_SENTENCE_POST_DOT_OPEN;	      break;	    case G_UNICODE_LOWERCASE_LETTER:	      /* wasn't a sentence-ending period; so re-enter the sentence	       * body	       */	      sentence_state = STATE_SENTENCE_BODY;	      break;	    default:	      /* End the sentence, break, maybe start a new one */	      g_assert (possible_sentence_end >= 0);	      g_assert (possible_sentence_boundary >= 0);	      attrs[possible_sentence_boundary].is_sentence_boundary = TRUE;	      attrs[possible_sentence_end].is_sentence_end = TRUE;	      possible_sentence_end = -1;	      possible_sentence_boundary = -1;	      MAYBE_START_NEW_SENTENCE;	      break;	    }	  break;	case STATE_SENTENCE_POST_DOT_OPEN:	  switch (type)	    {	    case G_UNICODE_OPEN_PUNCTUATION:	      /* continue in current state */	      break;	    case G_UNICODE_LOWERCASE_LETTER:	      /* wasn't a sentence-ending period; so re-enter the sentence	       * body	       */	      sentence_state = STATE_SENTENCE_BODY;	      break;	    default:	      /* End the sentence, break, maybe start a new one */	      g_assert (possible_sentence_end >= 0);	      g_assert (possible_sentence_boundary >= 0);	      attrs[possible_sentence_boundary].is_sentence_boundary = TRUE;	      attrs[possible_sentence_end].is_sentence_end = TRUE;	      possible_sentence_end = -1;	      possible_sentence_boundary = -1;	      MAYBE_START_NEW_SENTENCE;	      break;	    }	  break;	case STATE_SENTENCE_POST_DOT_SEP:	  /* Break is forced at this point, unless we're a newline	   * after a CR, then we will break after the newline on the	   * next iteration. Only a single Sep can be in the	   * sentence.	   */	  if (!(prev_wc == '\r' && wc == '\n'))	    attrs[i].is_sentence_boundary = TRUE;	  g_assert (possible_sentence_end >= 0);	  g_assert (possible_sentence_boundary >= 0);	  attrs[possible_sentence_end].is_sentence_end = TRUE;	  possible_sentence_end = -1;	  possible_sentence_boundary = -1;	  MAYBE_START_NEW_SENTENCE;	  break;	default:	  g_assert_not_reached ();	  break;	}      prev_type = type;      prev_wc = wc;      /* wc might not be a valid Unicode base character, but really all we       * need to know is the last non-combining character */      if (type != G_UNICODE_COMBINING_MARK &&	  type != G_UNICODE_ENCLOSING_MARK &&	  type != G_UNICODE_NON_SPACING_MARK)	base_character = wc;    }}static gbooleantailor_break (const gchar   *text,	     gint           length,	     PangoAnalysis *analysis,	     PangoLogAttr  *attrs,	     int            attrs_len){  if (analysis->lang_engine && PANGO_ENGINE_LANG_GET_CLASS (analysis->lang_engine)->script_break)    {      if (length < 0)	length = strlen (text);      else if (text == NULL)	text = "";      PANGO_ENGINE_LANG_GET_CLASS (analysis->lang_engine)->script_break (analysis->lang_engine, text, length, analysis, attrs, attrs_len);      return TRUE;    }  return FALSE;}/** * pango_break: * @text:      the text to process * @length:    length of @text in bytes (may be -1 if @text is nul-terminated) * @analysis:  #PangoAnalysis structure from pango_itemize() * @attrs:     an array to store character information in * @attrs_len: size of the array passed as @attrs * * Determines possible line, word, and character breaks * for a string of Unicode text with a single analysis.  For most * purposes you may want to use pango_get_log_attrs(). */voidpango_break (const gchar   *text,	     gint           length,	     PangoAnalysis *analysis,	     PangoLogAttr  *attrs,	     int            attrs_len){  g_return_if_fail (analysis != NULL);  g_return_if_fail (attrs != NULL);  pango_default_break (text, length, analysis, attrs, attrs_len);  tailor_break        (text, length, analysis, attrs, attrs_len);}/** * pango_find_paragraph_boundary: * @text: UTF-8 text * @length: length of @text in bytes, or -1 if nul-terminated * @paragraph_delimiter_index: return location for index of delimiter * @next_paragraph_start: return location for start of next paragraph * * Locates a paragraph boundary in @text. A boundary is caused by * delimiter characters, such as a newline, carriage return, carriage * return-newline pair, or Unicode paragraph separator character.  The * index of the run of delimiters is returned in * @paragraph_delimiter_index. The index of the start of the paragraph * (index after all delimiters) is stored in @next_paragraph_start. * * If no delimiters are found, both @paragraph_delimiter_index and * @next_paragraph_start are filled with the length of @text (an index one * off the end). **/voidpango_find_paragraph_boundary (const gchar *text,			       gint         length,			       gint        *paragraph_delimiter_index,			       gint        *next_paragraph_start){  const gchar *p = text;  const gchar *end;  const gchar *start = NULL;  const gchar *delimiter = NULL;  /* Only one character has type G_UNICODE_PARAGRAPH_SEPARATOR in   * Unicode 5.0; update the following code if that changes.   */  /* prev_sep is the first byte of the previous separator.  Since   * the valid separators are \r, \n, and PARAGRAPH_SEPARATOR, the   * first byte is enough to identify it.   */  gchar prev_sep;  if (length < 0)    length = strlen (text);  end = text + length;  if (paragraph_delimiter_index)    *paragraph_delimiter_index = length;  if (next_paragraph_start)    *next_paragraph_start = length;  if (length == 0)    return;  prev_sep = 0;  while (p != end)    {      if (prev_sep == '\n' ||	  prev_sep == PARAGRAPH_SEPARATOR_STRING[0])	{	  g_assert (delimiter);	  start = p;	  break;	}      else if (prev_sep == '\r')	{	  /* don't break between \r and \n */	  if (*p != '\n')	    {	      g_assert (delimiter);	      start = p;	      break;	    }	}      if (*p == '\n' ||	   *p == '\r' ||	   !strncmp(p, PARAGRAPH_SEPARATOR_STRING,		    strlen(PARAGRAPH_SEPARATOR_STRING)))	{	  if (delimiter == NULL)	    delimiter = p;	  prev_sep = *p;	}      else	prev_sep = 0;      p = g_utf8_next_char (p);    }  if (delimiter && paragraph_delimiter_index)    *paragraph_delimiter_index = delimiter - text;  if (start && next_paragraph_start)    *next_paragraph_start = start - text;}static inttailor_segment (const char      *range_start,		const char      *range_end,		PangoEngineLang *range_engine,		int              chars_broken,		PangoAnalysis   *analysis,		PangoLogAttr    *log_attrs){  int chars_in_range;  PangoLogAttr attr_before = log_attrs[0];  analysis->lang_engine = range_engine;  chars_in_range = g_utf8_strlen (range_start, range_end - range_start);  if (tailor_break (range_start,		    range_end - range_start,		    analysis,		    log_attrs + chars_broken,		    chars_in_range + 1))    {      /* if tailored, we enforce some of the attrs from before tailoring at       * the boundary       */     log_attrs[0].backspace_deletes_character  = attr_before.backspace_deletes_character;     log_attrs[0].is_line_break      |= attr_before.is_line_break;     log_attrs[0].is_mandatory_break |= attr_before.is_mandatory_break;     log_attrs[0].is_cursor_position |= attr_before.is_cursor_position;    }  return chars_in_range;}/** * pango_get_log_attrs: * @text: text to process * @length: length in bytes of @text * @level: embedding level, or -1 if unknown * @language: language tag * @log_attrs: array with one #PangoLogAttr per character in @text, plus one extra, to be filled in * @attrs_len: length of @log_attrs array * * Computes a #PangoLogAttr for each character in @text. The @log_attrs * array must have one #PangoLogAttr for each position in @text; if * @text contains N characters, it has N+1 positions, including the * last position at the end of the text. @text should be an entire * paragraph; logical attributes can't be computed without context * (for example you need to see spaces on either side of a word to know * the word is a word). */voidpango_get_log_attrs (const char    *text,		     int            length,		     int            level,		     PangoLanguage *language,		     PangoLogAttr  *log_attrs,		     int            attrs_len){  PangoMap *lang_map;  int chars_broken;  const char *range_start, *range_end;  PangoScript script;  PangoEngineLang *range_engine;  static guint engine_type_id = 0;  static guint render_type_id = 0;  PangoAnalysis analysis = { NULL };  PangoScriptIter *iter;  g_return_if_fail (length == 0 || text != NULL);  g_return_if_fail (log_attrs != NULL);  analysis.level = level;  pango_default_break (text, length, &analysis, log_attrs, attrs_len);  if (engine_type_id == 0)    {      engine_type_id = g_quark_from_static_string (PANGO_ENGINE_TYPE_LANG);      render_type_id = g_quark_from_static_string (PANGO_RENDER_TYPE_NONE);    }  lang_map = pango_find_map (language, engine_type_id, render_type_id);  chars_broken = 0;  iter = pango_script_iter_new (text, length);  pango_script_iter_get_range (iter, &range_start, &range_end, &script);  range_engine = (PangoEngineLang*) pango_map_get_engine (lang_map, script);  g_assert (range_start == text);  while (pango_script_iter_next (iter))    {      const char *run_start, *run_end;      PangoEngineLang* run_engine;      pango_script_iter_get_range (iter, &run_start, &run_end, &script);      run_engine = (PangoEngineLang*) pango_map_get_engine (lang_map, script);      g_assert (range_end == run_start);      if (range_engine != run_engine)	{	  /* Engine has changed; do the tailoring for the current range,	   * then start a new range.	   */	  chars_broken += tailor_segment (range_start, range_end, range_engine, chars_broken, &analysis, log_attrs);	  range_start = run_start;	  range_engine = run_engine;	}      range_end = run_end;    }  pango_script_iter_free (iter);  g_assert (length < 0 || range_end == text + length);  chars_broken += tailor_segment (range_start, range_end, range_engine, chars_broken, &analysis, log_attrs);  if (chars_broken + 1 > attrs_len)    g_warning ("pango_get_log_attrs: attrs_len should have been at least %d, but was %d.  Expect corrupted memory.",	       chars_broken + 1,	       attrs_len);}
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -