⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nxml_utf.c

📁 libnxml-no-curl-简化版2007-07-01,一个别很不错的XML生成和解析程序。
💻 C
字号:
/* nXml - Copyright (C) 2005 bakunin - Andrea Marchesini  *                                <bakunin@autistici.org> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. *  * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Lesser General Public License for more details. *  * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */#ifdef HAVE_CONFIG_H#include <config.h>#else# error Use configure; make; make install#endif#include "nxml.h"#include "nxml_internal.h"/* * UTF-8 * 7bits:	0xxxxxxx * 11bits:	110xxxxx 10xxxxxx * 16bits:	1110xxxx 10xxxxxx 10xxxxxx * 21bits:	11110xxx 10xxxxxx 10xxxxxx 10xxxxxx * 26bits:	111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * 31bits:	1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */int64_t__nxml_utf8 (unsigned char **buffer, size_t * size, int *byte){  int64_t ret;  unsigned char c0 = **buffer, c1, c2, c3, c4;  if (c0 < 0x80 || *size < 2)    {      *byte = 1;      ret = (int64_t) c0;      return ret;    }  c1 = *(*buffer + 1);  if ((c0 & 0xe0) == 0xc0 || *size < 3)    {      *byte = 2;      ret = (((c0 & 0x1f) << 6) | (c1 & 0x3f));      return ret;    }  c2 = *(*buffer + 2);  if ((c0 & 0xf0) == 0xe0 || *size < 4)    {      *byte = 3;      ret = (((c0 & 0x0f) << 12) | ((c1 & 0x3f) << 6) | (c2 & 0x3f));      return ret;    }  c3 = *(*buffer + 3);  if ((c0 & 0xf8) == 0xf0 || *size < 5)    {      *byte = 4;      ret =	(((c0 & 0x7) << 18) | ((c1 & 0x3f) << 12) | ((c2 & 0x3f) << 6) |	 (c3 & 0x3f));      return ret;    }  c4 = *(*buffer + 4);  if ((c0 & 0xfc) == 0xf8)    {      *byte = 5;      ret =	(((c0 & 0x3) << 24) | ((c1 & 0x3f) << 18) | ((c2 & 0x3f) << 12) |	 ((c3 & 0x3f) << 6) | (c4 & 0x3f));      return ret;    }  *byte = 1;  ret = (int64_t) c0;  return ret;}#define __NXML_XTO8( x , b ) \	    if(byte>=1023-b) { \		    if(!(ret=realloc(ret, (j+b)*sizeof(char)))) return -1; \		    byte=0; \	    } \            memcpy(&ret[j], x, b); \	    j+=b; \	    byte+=b;static size_t__nxml_utf16to8 (int le, unsigned char *buffer, size_t size,		 unsigned char **ret_buffer){  int64_t ch;  int j = 0;  int byte = 0;  unsigned char *ret;  if (!(ret = (unsigned char *) malloc (sizeof (unsigned char) * 1024)))    return -1;  while (size > 0)    {      if (le)	{	  if ((*buffer & 0xfc) == 0xd8 && (*(buffer + 2) & 0xfc) == 0xdc)	    {	      ch = ((*buffer & 0x03) << 18) + (*(buffer + 1) << 10) +		((*(buffer + 2) & 0x03) << 8) + *(buffer + 3);	      buffer += 4;	      size -= 4;	    }	  else	    {	      ch = (*buffer << 8) + *(buffer + 1);	      buffer += 2;	      size -= 2;	    }	}      else if ((*(buffer + 1) & 0xfc) == 0xd8	       && (*(buffer + 3) & 0xfc) == 0xdc)	{	  ch = ((*(buffer + 1) & 0x03) << 18) + (*buffer << 10) +	    ((*(buffer + 3) & 0x03) << 8) + *(buffer + 2);	  buffer += 4;	  size -= 4;	}      else	{	  ch = (*(buffer + 1) << 8) + *buffer;	  buffer += 2;	  size -= 2;	}      /* 8bit:  1000000 */      if (ch < 0x80)	{	  __NXML_XTO8 ((void *) &ch, 1);	}      /* 11bit:  xx100000 xx000000       *         1000 0000 0000        *      0x 8    0    0       */      else if (ch < 0x800)	{	  /* 11bits:    110xxxxx 10xxxxxx */	  char a[2];	  a[0] = (ch >> 6) | 0xc0;	  a[1] = (ch & 0x2f) | 0x80;	  __NXML_XTO8 ((void *) a, 2);	}      /* 16bit:  xxx10000 xx000000 xx000000       *         1 0000 0000 0000 0000       *      0x 1    0    0    0    0       */      else if (ch < 0x10000)	{	  /* 16bits:  1110xxxx 10xxxxxx 10xxxxxx */	  char a[3];	  a[0] = (ch >> 12) | 0xe0;	  a[1] = ((ch >> 6) & 0x2f) | 0x80;	  a[2] = (ch & 0x2f) | 0x80;	  __NXML_XTO8 ((void *) a, 3);	}      /* 21bit:  xxxx1000 xx000000 xx000000 xx000000       *         10 0000 0000 0000 0000 0000       *      0x 2  0    0    0    0    0       */      else if (ch < 0x200000)	{	  /* 21bits:  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */	  char a[4];	  a[0] = (ch >> 18) | 0xf0;	  a[1] = ((ch >> 12) & 0x2f);	  a[2] = ((ch >> 6) & 0x2f);	  a[3] = (ch & 0x2f);	  __NXML_XTO8 ((void *) a, 4);	}      /* 26bit:  xxxxx100 xx000000 xx000000 xx000000 xx000000       *         100 0000 0000 0000 0000 0000 0000       *      0x 4   0    0    0    0    0    0       */      else if (ch < 0x4000000)	{	  /* 21bits:  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */	  char a[5];	  a[0] = (ch >> 24) | 0xf8;	  a[1] = ((ch >> 18) & 0x2f);	  a[2] = ((ch >> 12) & 0x2f);	  a[3] = ((ch >> 6) & 0x2f);	  a[4] = (ch & 0x2f);	  __NXML_XTO8 ((void *) a, 5);	}    }  ret[j] = 0;  (*ret_buffer) = ret;  return (size_t) j;}static size_t__nxml_ucs4to8 (int type, unsigned char *buffer, size_t size,		unsigned char **ret_buffer){  int64_t ch;  int j = 0;  int byte = 0;  unsigned char *ret;  if (!(ret = (unsigned char *) malloc (sizeof (unsigned char) * 1024)))    return -1;  while (size > 0)    {      switch (type)	{	case 0:		/* 1234 */	  ch =	    (*buffer << 18) + (*(buffer + 1) << 12) + (*(buffer + 2) << 6) +	    (*(buffer + 3));	  break;	case 1:		/* 4321 */	  ch =	    (*buffer) + (*(buffer + 1) << 6) + (*(buffer + 2) << 12) +	    (*(buffer + 3) << 18);	  break;	case 2:		/* 2143 */	  ch =	    ((*buffer) << 12) + (*(buffer + 1) << 18) + (*(buffer + 2)) +	    (*(buffer + 3) << 6);	  break;	case 3:		/* 3412 */	  ch =	    ((*buffer) << 6) + (*(buffer + 1)) + (*(buffer + 2) << 18) +	    (*(buffer + 3) << 12);	  break;	}      buffer += 4;      size -= 4;      /* 8bit:  1000000 */      if (ch < 0x80)	{	  __NXML_XTO8 ((void *) &ch, 1);	}      /* 11bit:  xx100000 xx000000       *         1000 0000 0000        *      0x 8    0    0       */      else if (ch < 0x800)	{	  /* 11bits:    110xxxxx 10xxxxxx */	  char a[2];	  a[0] = (ch >> 6) | 0xc0;	  a[1] = (ch & 0x2f) | 0x80;	  __NXML_XTO8 ((void *) a, 2);	}      /* 16bit:  xxx10000 xx000000 xx000000       *         1 0000 0000 0000 0000       *      0x 1    0    0    0    0       */      else if (ch < 0x10000)	{	  /* 16bits:  1110xxxx 10xxxxxx 10xxxxxx */	  char a[3];	  a[0] = (ch >> 12) | 0xe0;	  a[1] = ((ch >> 6) & 0x2f) | 0x80;	  a[2] = (ch & 0x2f) | 0x80;	  __NXML_XTO8 ((void *) a, 3);	}      /* 21bit:  xxxx1000 xx000000 xx000000 xx000000       *         10 0000 0000 0000 0000 0000       *      0x 2  0    0    0    0    0       */      else if (ch < 0x200000)	{	  /* 21bits:  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */	  char a[4];	  a[0] = (ch >> 18) | 0xf0;	  a[1] = ((ch >> 12) & 0x2f);	  a[2] = ((ch >> 6) & 0x2f);	  a[3] = (ch & 0x2f);	  __NXML_XTO8 ((void *) a, 4);	}      /* 26bit:  xxxxx100 xx000000 xx000000 xx000000 xx000000       *         100 0000 0000 0000 0000 0000 0000       *      0x 4   0    0    0    0    0    0       */      else if (ch < 0x4000000)	{	  /* 21bits:  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */	  char a[5];	  a[0] = (ch >> 24) | 0xf8;	  a[1] = ((ch >> 18) & 0x2f);	  a[2] = ((ch >> 12) & 0x2f);	  a[3] = ((ch >> 6) & 0x2f);	  a[4] = (ch & 0x2f);	  __NXML_XTO8 ((void *) a, 5);	}    }  ret[j] = 0;  (*ret_buffer) = ret;  return (size_t) j;}int__nxml_utf_detection (char *r_buffer, size_t r_size, char **buffer,		      size_t * size, nxml_charset_t * charset){  /* Utf-8: 0x3c 0x3f 0x78 0x6d */  if (strncmp (r_buffer, "<?xml", 5))    {      *charset = NXML_CHARSET_UTF8;      return 0;    }  /* Utf-16LE: 0x00 0x3c 0x00 0x3f */  if (*r_buffer == 0x00 && *(r_buffer + 1) == 0x3c && *(r_buffer + 2) == 0x00      && *(r_buffer + 3) == 0x3f)    {      (*size) =	__nxml_utf16to8 (1, (unsigned char *) r_buffer, r_size,			 (unsigned char **) buffer);      if (*size < 0)	return -1;      *charset = NXML_CHARSET_UTF16LE;      return 1;    }  /* Utf-16BE: 0x3c 0x00 0x3f 0x00 */  if (*r_buffer == 0x3c && *(r_buffer + 1) == 0x00 && *(r_buffer + 2) == 0x3f      && *(r_buffer + 3) == 0x00)    {      (*size) =	__nxml_utf16to8 (0, (unsigned char *) r_buffer, r_size,			 (unsigned char **) buffer);      if (*size < 0)	return -1;      *charset = NXML_CHARSET_UTF16BE;      return 1;    }  /* UCS-4 (1234): 0x00 0x00 0x00 0x3c */  if (*r_buffer == 0x00 && *(r_buffer + 1) == 0x00 && *(r_buffer + 2) == 0x00      && *(r_buffer + 3) == 0x3c)    {      (*size) =	__nxml_ucs4to8 (0, (unsigned char *) r_buffer, r_size,			(unsigned char **) buffer);      if (*size < 0)	return -1;      *charset = NXML_CHARSET_UCS4_1234;      return 1;    }  /* UCS-4 (4321): 0x3c 0x00 0x00 0x00 */  if (*r_buffer == 0x3c && *(r_buffer + 1) == 0x00 && *(r_buffer + 2) == 0x00      && *(r_buffer + 3) == 0x00)    {      (*size) =	__nxml_ucs4to8 (1, (unsigned char *) r_buffer, r_size,			(unsigned char **) buffer);      if (*size < 0)	return -1;      *charset = NXML_CHARSET_UCS4_4321;      return 1;    }  /* UCS-4 (2143): 0x00 0x00 0x3c 0x00 */  if (*r_buffer == 0x00 && *(r_buffer + 1) == 0x00 && *(r_buffer + 2) == 0x3c      && *(r_buffer + 3) == 0x00)    {      (*size) =	__nxml_ucs4to8 (2, (unsigned char *) r_buffer, r_size,			(unsigned char **) buffer);      if (*size < 0)	return -1;      *charset = NXML_CHARSET_UCS4_2143;      return 1;    }  /* UCS-4 (3412): 0x00 0x3c 0x00 0x00 */  if (*r_buffer == 0x00 && *(r_buffer + 1) == 0x3c && *(r_buffer + 2) == 0x00      && *(r_buffer + 3) == 0x00)    {      (*size) =	__nxml_ucs4to8 (3, (unsigned char *) r_buffer, r_size,			(unsigned char **) buffer);      if (*size < 0)	return -1;      *charset = NXML_CHARSET_UCS4_3412;      return 1;    }  *charset = NXML_CHARSET_UNKNOWN;  return 0;}int64_t__nxml_int_charset (int ch, unsigned char *str, char *charset){  if (!charset || strcasecmp (charset, "utf-8"))    {      str[0] = ch;      return 1;    }  /* 8bit:  1000000 */  if (ch < 0x80)    {      str[0] = ch;      return 1;    }  /* 11bit:  xx100000 xx000000   *         1000 0000 0000    *      0x 8    0    0   */  else if (ch < 0x800)    {      /* 11bits:    110xxxxx 10xxxxxx */      str[0] = (ch >> 6) | 0xc0;      str[1] = (ch & 0x2f) | 0x80;      return 2;    }  /* 16bit:  xxx10000 xx000000 xx000000   *         1 0000 0000 0000 0000   *      0x 1    0    0    0    0   */  else if (ch < 0x10000)    {      /* 16bits:  1110xxxx 10xxxxxx 10xxxxxx */      str[0] = (ch >> 12) | 0xe0;      str[1] = ((ch >> 6) & 0x2f) | 0x80;      str[2] = (ch & 0x2f) | 0x80;      return 3;    }  /* 21bit:  xxxx1000 xx000000 xx000000 xx000000   *         10 0000 0000 0000 0000 0000   *      0x 2  0    0    0    0    0   */  else if (ch < 0x200000)    {      /* 21bits:  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */      str[0] = (ch >> 18) | 0xf0;      str[1] = ((ch >> 12) & 0x2f);      str[2] = ((ch >> 6) & 0x2f);      str[3] = (ch & 0x2f);      return 4;    }  /* 26bit:  xxxxx100 xx000000 xx000000 xx000000 xx000000   *         100 0000 0000 0000 0000 0000 0000   *      0x 4   0    0    0    0    0    0   */  else if (ch < 0x4000000)    {      /* 21bits:  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */      str[0] = (ch >> 24) | 0xf8;      str[1] = ((ch >> 18) & 0x2f);      str[2] = ((ch >> 12) & 0x2f);      str[3] = ((ch >> 6) & 0x2f);      str[4] = (ch & 0x2f);      return 5;    }  return 0;}/* EOF */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -