📄 gutf8.c

📁 嵌入式下基于MiniGUI的Web Browser
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
/* unicode_strchr *//** * g_unichar_to_utf8: * @c: a ISO10646 character code * @outbuf: output buffer, must have at least 6 bytes of space. *       If %NULL, the length will be computed and returned *       and nothing will be written to @outbuf. *  * Converts a single character to UTF-8. *  * Return value: number of bytes written **/intg_unichar_to_utf8 (gunichar c,		   gchar   *outbuf){  guint len = 0;      int first;  int i;  if (c < 0x80)    {      first = 0;      len = 1;    }  else if (c < 0x800)    {      first = 0xc0;      len = 2;    }  else if (c < 0x10000)    {      first = 0xe0;      len = 3;    }   else if (c < 0x200000)    {      first = 0xf0;      len = 4;    }  else if (c < 0x4000000)    {      first = 0xf8;      len = 5;    }  else    {      first = 0xfc;      len = 6;    }  if (outbuf)    {      for (i = len - 1; i > 0; --i)	{	  outbuf[i] = (c & 0x3f) | 0x80;	  c >>= 6;	}      outbuf[0] = c | first;    }  return len;}/** * g_utf8_strchr: * @p: a nul-terminated UTF-8 encoded string * @len: the maximum length of @p * @c: a ISO10646 character *  * Finds the leftmost occurrence of the given ISO10646 character * in a UTF-8 encoded string, while limiting the search to @len bytes. * If @len is -1, allow unbounded search. *  * Return value: %NULL if the string does not contain the character,  *   otherwise, a pointer to the start of the leftmost occurrence of  *   the character in the string. **/gchar *g_utf8_strchr (const char *p,	       gssize      len,	       gunichar    c){  gchar ch[10];  gint charlen = g_unichar_to_utf8 (c, ch);  ch[charlen] = '\0';    return g_strstr_len (p, len, ch);}/** * g_utf8_strrchr: * @p: a nul-terminated UTF-8 encoded string * @len: the maximum length of @p * @c: a ISO10646 character *  * Find the rightmost occurrence of the given ISO10646 character * in a UTF-8 encoded string, while limiting the search to @len bytes. * If @len is -1, allow unbounded search. *  * Return value: %NULL if the string does not contain the character,  *   otherwise, a pointer to the start of the rightmost occurrence of the  *   character in the string. **/gchar *g_utf8_strrchr (const char *p,		gssize      len,		gunichar    c){  gchar ch[10];  gint charlen = g_unichar_to_utf8 (c, ch);  ch[charlen] = '\0';    return g_strrstr_len (p, len, ch);}/* Like g_utf8_get_char, but take a maximum length * and return (gunichar)-2 on incomplete trailing character */static inline gunicharg_utf8_get_char_extended (const  gchar *p,			  gssize max_len)  {  guint i, len;  gunichar wc = (guchar) *p;  if (wc < 0x80)    {      return wc;    }  else if (wc < 0xc0)    {      return (gunichar)-1;    }  else if (wc < 0xe0)    {      len = 2;      wc &= 0x1f;    }  else if (wc < 0xf0)    {      len = 3;      wc &= 0x0f;    }  else if (wc < 0xf8)    {      len = 4;      wc &= 0x07;    }  else if (wc < 0xfc)    {      len = 5;      wc &= 0x03;    }  else if (wc < 0xfe)    {      len = 6;      wc &= 0x01;    }  else    {      return (gunichar)-1;    }    if (max_len >= 0 && len > max_len)    {      for (i = 1; i < max_len; i++)	{	  if ((((guchar *)p)[i] & 0xc0) != 0x80)	    return (gunichar)-1;	}      return (gunichar)-2;    }  for (i = 1; i < len; ++i)    {      gunichar ch = ((guchar *)p)[i];            if ((ch & 0xc0) != 0x80)	{	  if (ch)	    return (gunichar)-1;	  else	    return (gunichar)-2;	}      wc <<= 6;      wc |= (ch & 0x3f);    }  if (UTF8_LENGTH(wc) != len)    return (gunichar)-1;    return wc;}/** * g_utf8_get_char_validated: * @p: a pointer to Unicode character encoded as UTF-8 * @max_len: the maximum number of bytes to read, or -1, for no maximum. *  * Convert a sequence of bytes encoded as UTF-8 to a Unicode character. * This function checks for incomplete characters, for invalid characters * such as characters that are out of the range of Unicode, and for * overlong encodings of valid characters. *  * Return value: the resulting character. If @p points to a partial *    sequence at the end of a string that could begin a valid character, *    returns (gunichar)-2; otherwise, if @p does not point to a valid *    UTF-8 encoded Unicode character, returns (gunichar)-1. **/gunicharg_utf8_get_char_validated (const  gchar *p,			   gssize max_len){  gunichar result = g_utf8_get_char_extended (p, max_len);  if (result & 0x80000000)    return result;  else if (!UNICODE_VALID (result))    return (gunichar)-1;  else    return result;}/** * g_utf8_to_ucs4_fast: * @str: a UTF-8 encoded string * @len: the maximum length of @str to use. If @len < 0, then *       the string is nul-terminated. * @items_written: location to store the number of characters in the *                 result, or %NULL. * * Convert a string from UTF-8 to a 32-bit fixed width * representation as UCS-4, assuming valid UTF-8 input. * This function is roughly twice as fast as g_utf8_to_ucs4() * but does no error checking on the input. *  * Return value: a pointer to a newly allocated UCS-4 string. *               This value must be freed with g_free(). **/gunichar *g_utf8_to_ucs4_fast (const gchar *str,		     glong        len,              		     glong       *items_written)    {  gint j, charlen;  gunichar *result;  gint n_chars, i;  const gchar *p;  g_return_val_if_fail (str != NULL, NULL);  p = str;  n_chars = 0;  if (len < 0)    {      while (*p)	{	  p = g_utf8_next_char (p);	  ++n_chars;	}    }  else    {      while (p < str + len && *p)	{	  p = g_utf8_next_char (p);	  ++n_chars;	}    }    result = g_new (gunichar, n_chars + 1);    p = str;  for (i=0; i < n_chars; i++)    {      gunichar wc = ((unsigned char *)p)[0];      if (wc < 0x80)	{	  result[i] = wc;	  p++;	}      else	{ 	  if (wc < 0xe0)	    {	      charlen = 2;	      wc &= 0x1f;	    }	  else if (wc < 0xf0)	    {	      charlen = 3;	      wc &= 0x0f;	    }	  else if (wc < 0xf8)	    {	      charlen = 4;	      wc &= 0x07;	    }	  else if (wc < 0xfc)	    {	      charlen = 5;	      wc &= 0x03;	    }	  else	    {	      charlen = 6;	      wc &= 0x01;	    }	  for (j = 1; j < charlen; j++)	    {	      wc <<= 6;	      wc |= ((unsigned char *)p)[j] & 0x3f;	    }	  result[i] = wc;	  p += charlen;	}    }  result[i] = 0;  if (items_written)    *items_written = i;  return result;}/** * g_unichar_validate: * @ch: a Unicode character *  * Checks whether @ch is a valid Unicode character. Some possible * integer values of @ch will not be valid. 0 is considered a valid * character, though it's normally a string terminator. *  * Return value: %TRUE if @ch is a valid Unicode character **/gbooleang_unichar_validate (gunichar ch){  return UNICODE_VALID (ch);}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -