📄 gutf8.c

📁 嵌入式下基于MiniGUI的Web Browser
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* gutf8.c - Operations on UTF-8 strings. * * Copyright (C) 1999 Tom Tromey * Copyright (C) 2000 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */#include "glibconfig.h"#include <stdlib.h>#ifdef HAVE_CODESET#include <langinfo.h>#endif#include <string.h>#include "glib.h"#ifdef G_PLATFORM_WIN32#include <stdio.h>#define STRICT#include <windows.h>#undef STRICT#endif#include "libcharset.h"#include "glibintl.h"#define UTF8_COMPUTE(Char, Mask, Len)					      \  if (Char < 128)							      \    {									      \      Len = 1;								      \      Mask = 0x7f;							      \    }									      \  else if ((Char & 0xe0) == 0xc0)					      \    {									      \      Len = 2;								      \      Mask = 0x1f;							      \    }									      \  else if ((Char & 0xf0) == 0xe0)					      \    {									      \      Len = 3;								      \      Mask = 0x0f;							      \    }									      \  else if ((Char & 0xf8) == 0xf0)					      \    {									      \      Len = 4;								      \      Mask = 0x07;							      \    }									      \  else if ((Char & 0xfc) == 0xf8)					      \    {									      \      Len = 5;								      \      Mask = 0x03;							      \    }									      \  else if ((Char & 0xfe) == 0xfc)					      \    {									      \      Len = 6;								      \      Mask = 0x01;							      \    }									      \  else									      \    Len = -1;#define UTF8_LENGTH(Char)              \  ((Char) < 0x80 ? 1 :                 \   ((Char) < 0x800 ? 2 :               \    ((Char) < 0x10000 ? 3 :            \     ((Char) < 0x200000 ? 4 :          \      ((Char) < 0x4000000 ? 5 : 6)))))   #define UTF8_GET(Result, Chars, Count, Mask, Len)			      \  (Result) = (Chars)[0] & (Mask);					      \  for ((Count) = 1; (Count) < (Len); ++(Count))				      \    {									      \      if (((Chars)[(Count)] & 0xc0) != 0x80)				      \	{								      \	  (Result) = -1;						      \	  break;							      \	}								      \      (Result) <<= 6;							      \      (Result) |= ((Chars)[(Count)] & 0x3f);				      \    }#define UNICODE_VALID(Char)                   \    ((Char) < 0x110000 &&                     \     ((Char) < 0xD800 || (Char) >= 0xE000) && \     (Char) != 0xFFFE && (Char) != 0xFFFF)        static const gchar utf8_skip_data[256] = {  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1};const gchar * const g_utf8_skip = utf8_skip_data;/** * g_utf8_find_prev_char: * @str: pointer to the beginning of a UTF-8 encoded string * @p: pointer to some position within @str *  * Given a position @p with a UTF-8 encoded string @str, find the start * of the previous UTF-8 character starting before @p. Returns %NULL if no * UTF-8 characters are present in @p before @str. * * @p does not have to be at the beginning of a UTF-8 character. No check * is made to see if the character found is actually valid other than * it starts with an appropriate byte. * * Return value: a pointer to the found character or %NULL. **/gchar *g_utf8_find_prev_char (const char *str,		       const char *p){  for (--p; p >= str; --p)    {      if ((*p & 0xc0) != 0x80)	return (gchar *)p;    }  return NULL;}/** * g_utf8_find_next_char: * @p: a pointer to a position within a UTF-8 encoded string * @end: a pointer to the end of the string, or %NULL to indicate *        that the string is nul-terminated, in which case *        the returned value will be  * * Finds the start of the next UTF-8 character in the string after @p. * * @p does not have to be at the beginning of a UTF-8 character. No check * is made to see if the character found is actually valid other than * it starts with an appropriate byte. *  * Return value: a pointer to the found character or %NULL **/gchar *g_utf8_find_next_char (const gchar *p,		       const gchar *end){  if (*p)    {      if (end)	for (++p; p < end && (*p & 0xc0) == 0x80; ++p)	  ;      else	for (++p; (*p & 0xc0) == 0x80; ++p)	  ;    }  return (p == end) ? NULL : (gchar *)p;}/** * g_utf8_prev_char: * @p: a pointer to a position within a UTF-8 encoded string * * Finds the previous UTF-8 character in the string before @p. * * @p does not have to be at the beginning of a UTF-8 character. No check * is made to see if the character found is actually valid other than * it starts with an appropriate byte. If @p might be the first * character of the string, you must use g_utf8_find_prev_char() instead. *  * Return value: a pointer to the found character. **/gchar *g_utf8_prev_char (const gchar *p){  while (TRUE)    {      p--;      if ((*p & 0xc0) != 0x80)	return (gchar *)p;    }}/** * g_utf8_strlen: * @p: pointer to the start of a UTF-8 encoded string. * @max: the maximum number of bytes to examine. If @max *       is less than 0, then the string is assumed to be *       nul-terminated. *  * Returns the length of the string in characters. * * Return value: the length of the string in characters **/glongg_utf8_strlen (const gchar *p,               gssize       max){  glong len = 0;  const gchar *start = p;  if (max < 0)    {      while (*p)        {          p = g_utf8_next_char (p);          ++len;        }    }  else    {      if (max == 0 || !*p)        return 0;            p = g_utf8_next_char (p);                while (p - start < max && *p)        {          ++len;          p = g_utf8_next_char (p);                  }      /* only do the last len increment if we got a complete       * char (don't count partial chars)       */      if (p - start == max)        ++len;    }  return len;}/** * g_utf8_get_char: * @p: a pointer to Unicode character encoded as UTF-8 *  * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. * If @p does not point to a valid UTF-8 encoded character, results are * undefined. If you are not sure that the bytes are complete * valid Unicode characters, you should use g_utf8_get_char_validated() * instead. *  * Return value: the resulting character **/gunicharg_utf8_get_char (const gchar *p){  int i, mask = 0, len;  gunichar result;  unsigned char c = (unsigned char) *p;  UTF8_COMPUTE (c, mask, len);  if (len == -1)    return (gunichar)-1;  UTF8_GET (result, p, i, mask, len);  return result;}/** * g_utf8_offset_to_pointer: * @str: a UTF-8 encoded string * @offset: a character offset within @str *  * Converts from an integer character offset to a pointer to a position * within the string. *  * Return value: the resulting pointer **/gchar *g_utf8_offset_to_pointer  (const gchar *str,			   glong        offset)    {  const gchar *s = str;  while (offset--)    s = g_utf8_next_char (s);    return (gchar *)s;}/** * g_utf8_pointer_to_offset: * @str: a UTF-8 encoded string * @pos: a pointer to a position within @str *  * Converts from a pointer to position within a string to a integer * character offset. *  * Return value: the resulting character offset **/glong    g_utf8_pointer_to_offset (const gchar *str,			  const gchar *pos){  const gchar *s = str;  glong offset = 0;        while (s < pos)    {      s = g_utf8_next_char (s);      offset++;    }  return offset;}/** * g_utf8_strncpy: * @dest: buffer to fill with characters from @src * @src: UTF-8 encoded string * @n: character count *  * Like the standard C <function>strncpy()</function> function, but  * copies a given number of characters instead of a given number of  * bytes. The @src string must be valid UTF-8 encoded text.  * (Use g_utf8_validate() on all text before trying to use UTF-8  * utility functions with it.) *  * Return value: @dest **/gchar *g_utf8_strncpy (gchar       *dest,		const gchar *src,		gsize        n){  const gchar *s = src;  while (n && *s)    {      s = g_utf8_next_char(s);      n--;    }  strncpy(dest, src, s - src);  dest[s - src] = 0;  return dest;}
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -