⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 2utf.c

📁 一个UTF的源代码,可以提供参考
💻 C
📖 第 1 页 / 共 5 页
字号:
  enum  {    UCS2, UCS4, UCSwchar_t, UTF8f  }  output_format = UTF8f;  int index, create_aliases = FALSE, file_arg = 0, html_doc = FALSE, list_charmaps = FALSE, spit = FALSE, swap, switch_to_unicode = FALSE;  register int c;  long int l;  union  {    u_int8_t byte[4];    u_int16_t b16[2];    u_int32_t b32;  }  u_int;  wchar_t range, spit_min = 0xF000, spit_max = 0xF1FF, wchar;  UTF8_charset.pipe = USASCII_charset.pipe = unknown_charset.pipe = stdout;  /* standard UCS byte order is big endian */#if (__BYTE_ORDER != __BIG_ENDIAN)  swap = TRUE;#else  swap = FALSE;#endif  if (argc <= 1)  {  }  else  {    /*while ((c = getopt_long (argc, argv, optstring, long_options, NULL)) != -1)*/    Long2short_options (argc, argv, long2short_options);    while ((c = getopt (argc, argv, optstring)) != -1)    {      if (verbose)	fprintf (stderr, " -%c", c);      switch (c)      {	case '?':	case ':':	  Short_help(3);	case '2':	  output_format = UCS2;	  break;	case '4':	case 'w':	  output_format = UCS4;	  break;	case '8':	  output_format = UTF8f;	  break;	case 'C':	  create_aliases = 1;	  break;	case 'V':	  fprintf (stderr, "%s%s", version, blurb);	  exit (0);	case 'c':	  charmap_filename = xstrdup (optarg);	  if (verbose)	    fprintf (stderr, "'%s'", charmap_filename);	  break;	case 'd':	  Debug = 1;	  if (optarg)	    Debug = atoi (optarg);	  if (verbose)	    fprintf (stderr, "%i", Debug);	  verbose = TRUE;	  break;	case 'e':	  encode = TRUE;	  break;	case 'f':	  if (optarg)	  {	    charmap_format[DEFAULT] = xstrdup (optarg);	  }	  else	    charmap_format[DEFAULT] = default_charmap_format;	  if (verbose)	    fprintf (stderr, "'%s'", charmap_format[DEFAULT]);	  break;	case 'H':	  html_doc = TRUE;	  break;	case 'h':	  Help (default_unknown_char);	  exit (0);	case 'i':	  iconv_only = TRUE;	  if (optarg) {	    if (Strcasecmp(optarg, "only") == 0)	      iconv_only = TRUE;	    else if (Strcasecmp(optarg, "first") == 0) {	      iconv_only = FALSE;	      iconv_first = TRUE;	    } else if (Strcasecmp(optarg, "last") == 0) {	      iconv_only = FALSE;	      iconv_first = FALSE;	    } else	      Short_help(5);	  }	  break;	case 'l':	  list_charmaps = TRUE;	  break;	case 'o':	  reverse = FALSE;	  break;	case 'p':	  Read_config_file ();	  Print_paths ();	  exit (0);	case 'r':	  reverse = TRUE;	  break;	case 'S':	  spit = 1;	  if (optarg)	  {	    if (strcspn (optarg + strspn (optarg, "\t\n "), "-,;") == 0)	    {	      spit_min = 0;	      if ((next_token = strtok (optarg, "\t\n -,;")) == NULL)		spit = 2;	    }	    else	    {	      if (strpbrk (optarg + strspn (optarg, "\t\n -,;"), "-,;"))		spit = 2;	      if ((next_token = strtok (optarg, "\t\n -,;")) == NULL)		Short_help(5);	      errno = 0;	      range = strtoul (next_token, (char **) &stop_char_ptr, 16);	      if (errno != ERANGE && *stop_char_ptr == '\0' &&range >= 0 && range <= 0x7FFFFFFF)		spit_min = range;	      else		Short_help(5);	      next_token = strtok (NULL, "\t\n -,;");	    }	    if (next_token)	    {	      errno = 0;	      range = strtoul (next_token, (char **) &stop_char_ptr, 16);	      if (errno != ERANGE && *stop_char_ptr == '\0' &&range >= 0 && range <= 0x7FFFFFFF)		spit_max = range;	      else		Short_help(5);	    }	    else if (spit == 2)	      spit_max = (spit_min > 0x7FFFFFFF - 511) ? 0x7FFFFFFF : spit_min / 32 * 32 + 511;	    else	      spit_max = spit_min / 32 * 32 + 31;	    if (spit_max < spit_min)	    {	      range = spit_min;	      spit_min = spit_max;	      spit_max = range;	    }	    if (strtok (NULL, "\t\n -,;"))	      Short_help(5);	  }	  /*fprintf(stderr,"min = %lx\n",spit_min); */	  if (verbose)	    fprintf (stderr, "0x%.4lX-0x%.4lX\n", (long unsigned int) spit_min, (long unsigned int) spit_max);	  break;	case 's':	  switch_to_unicode = TRUE;	  break;	case 'u':	  if (optarg)	    if (strlen (optarg) <= 1)	      unknown_char = optarg[0];	    else	    {	      errno = 0;	      l = strtol (optarg, (char **) &stop_char_ptr, 0);	      if (errno != ERANGE && *stop_char_ptr == '\0' && l <= 0xFF)		unknown_char = l;	      else		Short_help(5);	    }	  else	    unknown_char = default_unknown_char;	  if (verbose)	    fprintf (stderr, "0x%.2X", unknown_char);	  break;	case 'v':	  verbose = TRUE;	  fprintf (stderr, " -v");	  break;	case 'W':	  show_charmap = TRUE;	  break;	  /*          case 'w':		      output_format = UCSwchar_t;		      break; */	default:	  fprintf (stderr, "getopt returned character code 0x%X ??\n", c);      }    }    if (verbose)      putc ('\n', stderr);  }  if (encode && !iconv_only)    Error (add_iconv_only);  if (!(ptr = strrchr (argv[0], '/')))    ptr = argv[0];  else    ptr++;  if (Strcasecmp (ptr, "fromUTF") == 0)    reverse = !reverse;  if (Debug > 7)    fprintf (stderr, "2UTF: argv[0]='%s', filename='%s'. \n", argv[0], ptr);  if (Debug)    if (reverse)      fprintf (stderr, "2UTF: from UTF \n");    else      fprintf (stderr, "2UTF: to UTF \n");  if (switch_to_unicode)    fprintf (stderr, "\033%%G");  if (iconv_only) {    paths_number = 0;  } else {    Read_config_file ();  }  if (atexit (&Close_pipe) != 0)    Error (internal_err);  if (create_aliases)    if (!(c = Create_aliases ()))    {      fprintf (stderr, "2UTF: %s '%s' \n", can_not_create, aliases_pathname);      exit (7);    }  if (list_charmaps)    exit (List_charmaps ()? 0 : 8);  if (spit)  {    Spit (spit_min, spit_max, NULL, unknown_char);    exit (0);  }  if (argc > 1 && optind < argc) {    charmap_filename = argv[file_arg = argc - 1];  }  else if (charmap_filename == NULL)    if (create_aliases)      exit (0);    else    {      /* Assuming mail message */      if (output_format == UCS2 || output_format == UCS4 || reverse)	Error (unimplemented);      Mail = TRUE;      c = parse_message (0);      if (charset_p->type == BUF_PIPE || charset_p->type == NON_BUF_PIPE)	pclose (charset_p->pipe);      Validate_charset ("'unknown'", 0);      /*charset_p = &unknown_charset;*/      Pipe_to_UTF8 (stdin);      if (c != END_BOUNDARY)      {	if (Debug)	  fprintf (stderr, "parse of message failed: %d\n", c);	exit (1);      }      else if (Debug)	fprintf (stderr, "parse of message complete\n");      exit (0);    }  /* For the output of interractive programms */  setvbuf (stdout, NULL, _IONBF, 0);  if (show_charmap)  {    reverse = FALSE;    unknown_wchar = (wchar_t) '?';  }  if (!Validate_charset (charmap_filename, 0))  {				/* (charset_p->type == UNKNOWN) */    if (strchr (charmap_filename, '/') == NULL)    {      fprintf (stderr, "%s '%s' \n", can_not_find_alias, charmap_filename);      for (index = 0; index < paths_number; index++)	fprintf (stderr, " %s*%s* \n", paths[index], charmap_filename);    }    else      fprintf (stderr, " %s \n", charmap_filename);    Error (can_not_open_any);  }  else if (charset_p->type == UTF8)  {    if (verbose)      fprintf (stderr, "2UTF: %s '%s' \n", using, "Unicode UTF-8");  }  else if (charset_p->type == USASCII)  {    if (verbose)      fprintf (stderr, "2UTF: %s '%s' \n", using, "US-ASCII");    charset_p->charmap = xmalloc (256 * sizeof (wchar_t));    charset_p->type = KNOWN;    charset_p->pipe = stdout;    charset_p->USASCII_is_subset = IS;    for (wchar = 0; wchar <= 0x7F; wchar++)      charset_p->charmap[wchar] = wchar;    for (wchar = 0x7F; wchar <= 0xFF; wchar++)      charset_p->charmap[wchar] = unknown_wchar;  }  if (charmap_filename != argv[file_arg])    free (charmap_filename);  if (show_charmap)  {    unknown_wchar = (wchar_t) '?';    if (charset_p->type != UTF8)      Spit (0, 255, charset_p->charmap, unknown_char);    else      Spit (0, 0x7FFFFFFF, NULL, unknown_char);    exit (0);  }  if (charset_p->type != KNOWN && (output_format == UCS2 || output_format == UCS4))    Error (unimplemented);  if (reverse)  {    if (output_format == UCS2)    {      while (fread (u_int.byte, 1, 2, stdin) == 2)      {	/* check Byte Order Mark */	if (0xFFFE == u_int.b16[0])	  swap = TRUE;	else if (0xFEFF == u_int.b16[0])	  swap = FALSE;	else	{	  if (swap)	  {	    c = u_int.byte[0];	    u_int.byte[0] = u_int.byte[1];	    u_int.byte[1] = c;	  }	  wchar = u_int.b16[0];	  Print_approx_Macro (1, wchar, unknown_char)	}      }    }    else if (output_format == UCS4)    {      while (fread (u_int.byte, 1, 4, stdin) == 4)      {#if (__BYTE_ORDER == __PDP_ENDIAN)#error	c = u_int.b16[0];	u_int.b16[0] = u_int.b16[1];	u_int.b16[1] = c;#endif	/* check Byte Order Mark */	if (0xFFFE0000 == u_int.b32)	  swap = TRUE;	else if (0x0000FEFF == u_int.b32)	  swap = FALSE;	else	{	  if (swap)	  {	    c = u_int.byte[0];	    u_int.byte[0] = u_int.byte[3];	    u_int.byte[3] = c;	    c = u_int.byte[1];	    u_int.byte[1] = u_int.byte[2];	    u_int.byte[2] = c;	  }	  wchar = u_int.b32;	  Print_approx_Macro (1, wchar, unknown_char)	}      }    }    else if (output_format == UTF8f)    {      if (charset_p->type == ICONV) {	while ((c = getc (stdin)) != EOF)	  Put_to_iconv_buf (c, charset_p);	Put_to_iconv_buf (-2, charset_p);      } else if (charset_p->type == KNOWN) {	while ((c = Freadmb (&wchar, stdin)) != EOF)	  Print_approx_Macro (c, wchar, unknown_char)      } else if (charset_p->type == UTF8 || charset_p->type == BUF_PIPE || charset_p->type == NON_BUF_PIPE || charset_p->type == USASCII) {	while ((c = getc (stdin)) != EOF) {	  if (putc (c, charset_p->pipe) == EOF)	    Error (IO_err);	}      } else	Error (internal_err);      if (ferror (stdin))	Error (IO_err);    }    else      Error (internal_err);  }  else    /* not reverse */  {    if (output_format == UCS2)    {      putchar (0xFE);      putchar (0xFF);      while ((c = getchar ()) != EOF)      {	u_int.b16[0] = (u_int16_t) charset_p->charmap[c];#if (__BYTE_ORDER != __BIG_ENDIAN)	c = u_int.byte[0];	u_int.byte[0] = u_int.byte[1];	u_int.byte[1] = c;#endif	if (fwrite (u_int.byte, 1, 2, stdout) != 2)	  Error (IO_err);      }    }    else if (output_format == UCS4)    {      putchar (0);      putchar (0);      putchar (0xFE);      putchar (0xFF);      while ((c = getchar ()) != EOF)      {	u_int.b32 = (u_int32_t) charset_p->charmap[c];#if (__BYTE_ORDER == __LITTLE_ENDIAN)	c = u_int.byte[0];	u_int.byte[0] = u_int.byte[3];	u_int.byte[3] = c;	c = u_int.byte[1];	u_int.byte[1] = u_int.byte[2];	u_int.byte[2] = c;#elif (__BYTE_ORDER == __PDP_ENDIAN)	c = u_int.byte[0];	u_int.byte[0] = u_int.byte[1];	u_int.byte[1] = c;	c = u_int.byte[2];	u_int.byte[2] = u_int.byte[3];	u_int.byte[3] = c;#elif (__BYTE_ORDER == __BIG_ENDIAN)#else#error Unknown byte order.#endif	if (fwrite (u_int.byte, 1, 4, stdout) != 4)	  Error (IO_err);      }    }    /*      else if (output_format == UCSwchar_t)	    {	    while ((c = getchar ()) != EOF)	    if (fwrite (&charset_p->charmap[c], sizeof (wchar_t), 1, stdout) != 1)	    Error (IO_err);	    }     */	    else if (output_format == UTF8f)	      Pipe_to_UTF8 (stdin);	    else	      Error (internal_err);  }  if (feof (stdin))    exit (0);  else    Error (IO_err);  exit (101);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -