⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 test-iconv.c

📁 《Linux程序设计权威指南》源代码
💻 C
字号:
// File: test-iconv.c#include <stdio.h>#include <stdlib.h>#include <string.h>#include <iconv.h>#include <errno.h>// 用 iconv() 对单个字符进行编码转换:输入 charset -> UTF-8static int try ();// UTF-8 -> Unicodestatic unsigned int utf8_decode ();intmain (int argc, char **argv){  int i;  const char *charset;  unsigned const char *str;  unsigned char buf[4];  unsigned char out[6];  size_t inlength;  size_t result;  iconv_t cd;  if (argc != 3)    {      fprintf (stderr, "Usage: %s charset string\n", argv[0]);      exit (1);    }  charset = argv[1];  str = argv[2];  // 打开 charset -> UTF-8 的转换描述符  cd = iconv_open ("UTF-8", charset);  if (cd == (iconv_t) (-1))    {      perror ("iconv_open");      exit (1);    }  inlength = strlen (str);  // 对输入字符串进行逐字转换  for (i = 0; i < inlength; i++)    {      buf[0] = str[i];      result = try (cd, buf, 1, out);      if (result < 0)	{	}      else if (result > 0)	{	  // 单字节编码	  printf ("%c\t", buf[0]);	  printf ("0x%02X\t0x%04X\n", buf[0], utf8_decode (out, result));	}      else	{	  buf[1] = str[++i];	  result = try (cd, buf, 2, out);	  if (result < 0)	    {	    }	  else if (result > 0)	    {	      // 双字节编码	      printf ("%c%c\t", buf[0], buf[1]);	      printf ("0x%02X%02X\t0x%04X\n",		      buf[0], buf[1], utf8_decode (out, result));	    }	  else	    {	      buf[2] = str[++i];	      result = try (cd, buf, 3, out);	      if (result < 0)		{		}	      else if (result > 0)		{		  // 三字节编码	          printf ("%c%c%c\t", buf[0], buf[1], buf[2]);		  printf ("0x%02X%02X%02X\t0x%04X\n",			  buf[0], buf[1], buf[2], utf8_decode (out, result));		}	      else		{		  buf[3] = str[++i];		  result = try (cd, buf, 4, out);		  if (result < 0)		    {		    }		  else if (result > 0)		    {		      // 四字节编码	              printf ("%c%c%c%c\t", buf[0], buf[1], buf[2], buf[3]);		      printf ("0x%02X%02X%02X%02X\t0x%04X\n",			      buf[0], buf[1], buf[2], buf[3],			      utf8_decode (out, result));		    }		  else		    {		      // 转换失败		      fprintf (stderr, "0x%02X%02X%02X%02X",			       buf[0], buf[1], buf[2], buf[3]);		      fprintf (stderr, ": incomplete byte sequence\n");		      exit (1);		    }		}	    }	}    }  // 关闭转换描述符  if (iconv_close (cd) < 0)    {      perror ("iconv_close");      exit (1);    }  return (0);}// 用 iconv() 对单个字符进行编码转换:输入 charset -> UTF-8static inttry (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned char *out){  int i;  const char *inbuf = (const char *) buf;  size_t inbytesleft = buflen;  char *outbuf = (char *) out;  size_t outbytesleft = 6;  size_t result = iconv (cd,			 (char **) &inbuf, &inbytesleft,			 &outbuf, &outbytesleft);  if (result == (size_t) (-1))    {      if (errno == EILSEQ)	{	  return -1;	}      else if (errno == EINVAL)	{	  return 0;	}      else	{	  int saved_errno = errno;	  fprintf (stderr, "0x");	  for (i = 0; i < buflen; i++)	    fprintf (stderr, "%02X", buf[i]);	  fprintf (stderr, ": iconv error: ");	  errno = saved_errno;	  perror ("");	  exit (1);	}    }  else    {      if (inbytesleft != 0)	{	  fprintf (stderr, "0x");	  for (i = 0; i < buflen; i++)	    fprintf (stderr, "%02X", buf[i]);	  fprintf (stderr, ": inbytes = %ld, outbytes = %ld\n",		   (long) (buflen - inbytesleft), (long) (6 - outbytesleft));	  exit (1);	}      return 6 - outbytesleft;    }}// UTF-8 -> Unicodestatic unsigned intutf8_decode (const unsigned char *out, unsigned int outlen){  return (outlen == 1 ? out[0] :	  outlen == 2 ? ((out[0] & 0x1f) << 6) + (out[1] & 0x3f) :	  outlen == 3 ? ((out[0] & 0x0f) << 12) + ((out[1] & 0x3f) << 6) +			 (out[2] & 0x3f) :	  outlen == 4 ? ((out[0] & 0x07) << 18) + ((out[1] & 0x3f) << 12) +			((out[2] & 0x3f) << 6) + (out[3] & 0x3f) :	  outlen == 5 ? ((out[0] & 0x03) << 24) + ((out[1] & 0x3f) << 18) +			((out[2] & 0x3f) << 12) + ((out[3] & 0x3f) << 6) +			 (out[4] & 0x3f) :	  outlen == 6 ? ((out[0] & 0x01) << 30) + ((out[1] & 0x3f) << 24) +			((out[2] & 0x3f) << 18) + ((out[3] & 0x3f) << 12) +			((out[4] & 0x3f) << 6) + (out[5] & 0x3f) :	  0xfffd);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -