📄 2utf.c
字号:
enum { UCS2, UCS4, UCSwchar_t, UTF8f } output_format = UTF8f; int index, create_aliases = FALSE, file_arg = 0, html_doc = FALSE, list_charmaps = FALSE, spit = FALSE, swap, switch_to_unicode = FALSE; register int c; long int l; union { u_int8_t byte[4]; u_int16_t b16[2]; u_int32_t b32; } u_int; wchar_t range, spit_min = 0xF000, spit_max = 0xF1FF, wchar; UTF8_charset.pipe = USASCII_charset.pipe = unknown_charset.pipe = stdout; /* standard UCS byte order is big endian */#if (__BYTE_ORDER != __BIG_ENDIAN) swap = TRUE;#else swap = FALSE;#endif if (argc <= 1) { } else { /*while ((c = getopt_long (argc, argv, optstring, long_options, NULL)) != -1)*/ Long2short_options (argc, argv, long2short_options); while ((c = getopt (argc, argv, optstring)) != -1) { if (verbose) fprintf (stderr, " -%c", c); switch (c) { case '?': case ':': Short_help(3); case '2': output_format = UCS2; break; case '4': case 'w': output_format = UCS4; break; case '8': output_format = UTF8f; break; case 'C': create_aliases = 1; break; case 'V': fprintf (stderr, "%s%s", version, blurb); exit (0); case 'c': charmap_filename = xstrdup (optarg); if (verbose) fprintf (stderr, "'%s'", charmap_filename); break; case 'd': Debug = 1; if (optarg) Debug = atoi (optarg); if (verbose) fprintf (stderr, "%i", Debug); verbose = TRUE; break; case 'e': encode = TRUE; break; case 'f': if (optarg) { charmap_format[DEFAULT] = xstrdup (optarg); } else charmap_format[DEFAULT] = default_charmap_format; if (verbose) fprintf (stderr, "'%s'", charmap_format[DEFAULT]); break; case 'H': html_doc = TRUE; break; case 'h': Help (default_unknown_char); exit (0); case 'i': iconv_only = TRUE; if (optarg) { if (Strcasecmp(optarg, "only") == 0) iconv_only = TRUE; else if (Strcasecmp(optarg, "first") == 0) { iconv_only = FALSE; iconv_first = TRUE; } else if (Strcasecmp(optarg, "last") == 0) { iconv_only = FALSE; iconv_first = FALSE; } else Short_help(5); } break; case 'l': list_charmaps = TRUE; break; case 'o': reverse = FALSE; break; case 'p': Read_config_file (); Print_paths (); exit (0); case 'r': reverse = TRUE; break; case 'S': spit = 1; if (optarg) { if (strcspn (optarg + strspn (optarg, "\t\n "), "-,;") == 0) { spit_min = 0; if ((next_token = strtok (optarg, "\t\n -,;")) == NULL) spit = 2; } else { if (strpbrk (optarg + strspn (optarg, "\t\n -,;"), "-,;")) spit = 2; if ((next_token = strtok (optarg, "\t\n -,;")) == NULL) Short_help(5); errno = 0; range = strtoul (next_token, (char **) &stop_char_ptr, 16); if (errno != ERANGE && *stop_char_ptr == '\0' &&range >= 0 && range <= 0x7FFFFFFF) spit_min = range; else Short_help(5); next_token = strtok (NULL, "\t\n -,;"); } if (next_token) { errno = 0; range = strtoul (next_token, (char **) &stop_char_ptr, 16); if (errno != ERANGE && *stop_char_ptr == '\0' &&range >= 0 && range <= 0x7FFFFFFF) spit_max = range; else Short_help(5); } else if (spit == 2) spit_max = (spit_min > 0x7FFFFFFF - 511) ? 0x7FFFFFFF : spit_min / 32 * 32 + 511; else spit_max = spit_min / 32 * 32 + 31; if (spit_max < spit_min) { range = spit_min; spit_min = spit_max; spit_max = range; } if (strtok (NULL, "\t\n -,;")) Short_help(5); } /*fprintf(stderr,"min = %lx\n",spit_min); */ if (verbose) fprintf (stderr, "0x%.4lX-0x%.4lX\n", (long unsigned int) spit_min, (long unsigned int) spit_max); break; case 's': switch_to_unicode = TRUE; break; case 'u': if (optarg) if (strlen (optarg) <= 1) unknown_char = optarg[0]; else { errno = 0; l = strtol (optarg, (char **) &stop_char_ptr, 0); if (errno != ERANGE && *stop_char_ptr == '\0' && l <= 0xFF) unknown_char = l; else Short_help(5); } else unknown_char = default_unknown_char; if (verbose) fprintf (stderr, "0x%.2X", unknown_char); break; case 'v': verbose = TRUE; fprintf (stderr, " -v"); break; case 'W': show_charmap = TRUE; break; /* case 'w': output_format = UCSwchar_t; break; */ default: fprintf (stderr, "getopt returned character code 0x%X ??\n", c); } } if (verbose) putc ('\n', stderr); } if (encode && !iconv_only) Error (add_iconv_only); if (!(ptr = strrchr (argv[0], '/'))) ptr = argv[0]; else ptr++; if (Strcasecmp (ptr, "fromUTF") == 0) reverse = !reverse; if (Debug > 7) fprintf (stderr, "2UTF: argv[0]='%s', filename='%s'. \n", argv[0], ptr); if (Debug) if (reverse) fprintf (stderr, "2UTF: from UTF \n"); else fprintf (stderr, "2UTF: to UTF \n"); if (switch_to_unicode) fprintf (stderr, "\033%%G"); if (iconv_only) { paths_number = 0; } else { Read_config_file (); } if (atexit (&Close_pipe) != 0) Error (internal_err); if (create_aliases) if (!(c = Create_aliases ())) { fprintf (stderr, "2UTF: %s '%s' \n", can_not_create, aliases_pathname); exit (7); } if (list_charmaps) exit (List_charmaps ()? 0 : 8); if (spit) { Spit (spit_min, spit_max, NULL, unknown_char); exit (0); } if (argc > 1 && optind < argc) { charmap_filename = argv[file_arg = argc - 1]; } else if (charmap_filename == NULL) if (create_aliases) exit (0); else { /* Assuming mail message */ if (output_format == UCS2 || output_format == UCS4 || reverse) Error (unimplemented); Mail = TRUE; c = parse_message (0); if (charset_p->type == BUF_PIPE || charset_p->type == NON_BUF_PIPE) pclose (charset_p->pipe); Validate_charset ("'unknown'", 0); /*charset_p = &unknown_charset;*/ Pipe_to_UTF8 (stdin); if (c != END_BOUNDARY) { if (Debug) fprintf (stderr, "parse of message failed: %d\n", c); exit (1); } else if (Debug) fprintf (stderr, "parse of message complete\n"); exit (0); } /* For the output of interractive programms */ setvbuf (stdout, NULL, _IONBF, 0); if (show_charmap) { reverse = FALSE; unknown_wchar = (wchar_t) '?'; } if (!Validate_charset (charmap_filename, 0)) { /* (charset_p->type == UNKNOWN) */ if (strchr (charmap_filename, '/') == NULL) { fprintf (stderr, "%s '%s' \n", can_not_find_alias, charmap_filename); for (index = 0; index < paths_number; index++) fprintf (stderr, " %s*%s* \n", paths[index], charmap_filename); } else fprintf (stderr, " %s \n", charmap_filename); Error (can_not_open_any); } else if (charset_p->type == UTF8) { if (verbose) fprintf (stderr, "2UTF: %s '%s' \n", using, "Unicode UTF-8"); } else if (charset_p->type == USASCII) { if (verbose) fprintf (stderr, "2UTF: %s '%s' \n", using, "US-ASCII"); charset_p->charmap = xmalloc (256 * sizeof (wchar_t)); charset_p->type = KNOWN; charset_p->pipe = stdout; charset_p->USASCII_is_subset = IS; for (wchar = 0; wchar <= 0x7F; wchar++) charset_p->charmap[wchar] = wchar; for (wchar = 0x7F; wchar <= 0xFF; wchar++) charset_p->charmap[wchar] = unknown_wchar; } if (charmap_filename != argv[file_arg]) free (charmap_filename); if (show_charmap) { unknown_wchar = (wchar_t) '?'; if (charset_p->type != UTF8) Spit (0, 255, charset_p->charmap, unknown_char); else Spit (0, 0x7FFFFFFF, NULL, unknown_char); exit (0); } if (charset_p->type != KNOWN && (output_format == UCS2 || output_format == UCS4)) Error (unimplemented); if (reverse) { if (output_format == UCS2) { while (fread (u_int.byte, 1, 2, stdin) == 2) { /* check Byte Order Mark */ if (0xFFFE == u_int.b16[0]) swap = TRUE; else if (0xFEFF == u_int.b16[0]) swap = FALSE; else { if (swap) { c = u_int.byte[0]; u_int.byte[0] = u_int.byte[1]; u_int.byte[1] = c; } wchar = u_int.b16[0]; Print_approx_Macro (1, wchar, unknown_char) } } } else if (output_format == UCS4) { while (fread (u_int.byte, 1, 4, stdin) == 4) {#if (__BYTE_ORDER == __PDP_ENDIAN)#error c = u_int.b16[0]; u_int.b16[0] = u_int.b16[1]; u_int.b16[1] = c;#endif /* check Byte Order Mark */ if (0xFFFE0000 == u_int.b32) swap = TRUE; else if (0x0000FEFF == u_int.b32) swap = FALSE; else { if (swap) { c = u_int.byte[0]; u_int.byte[0] = u_int.byte[3]; u_int.byte[3] = c; c = u_int.byte[1]; u_int.byte[1] = u_int.byte[2]; u_int.byte[2] = c; } wchar = u_int.b32; Print_approx_Macro (1, wchar, unknown_char) } } } else if (output_format == UTF8f) { if (charset_p->type == ICONV) { while ((c = getc (stdin)) != EOF) Put_to_iconv_buf (c, charset_p); Put_to_iconv_buf (-2, charset_p); } else if (charset_p->type == KNOWN) { while ((c = Freadmb (&wchar, stdin)) != EOF) Print_approx_Macro (c, wchar, unknown_char) } else if (charset_p->type == UTF8 || charset_p->type == BUF_PIPE || charset_p->type == NON_BUF_PIPE || charset_p->type == USASCII) { while ((c = getc (stdin)) != EOF) { if (putc (c, charset_p->pipe) == EOF) Error (IO_err); } } else Error (internal_err); if (ferror (stdin)) Error (IO_err); } else Error (internal_err); } else /* not reverse */ { if (output_format == UCS2) { putchar (0xFE); putchar (0xFF); while ((c = getchar ()) != EOF) { u_int.b16[0] = (u_int16_t) charset_p->charmap[c];#if (__BYTE_ORDER != __BIG_ENDIAN) c = u_int.byte[0]; u_int.byte[0] = u_int.byte[1]; u_int.byte[1] = c;#endif if (fwrite (u_int.byte, 1, 2, stdout) != 2) Error (IO_err); } } else if (output_format == UCS4) { putchar (0); putchar (0); putchar (0xFE); putchar (0xFF); while ((c = getchar ()) != EOF) { u_int.b32 = (u_int32_t) charset_p->charmap[c];#if (__BYTE_ORDER == __LITTLE_ENDIAN) c = u_int.byte[0]; u_int.byte[0] = u_int.byte[3]; u_int.byte[3] = c; c = u_int.byte[1]; u_int.byte[1] = u_int.byte[2]; u_int.byte[2] = c;#elif (__BYTE_ORDER == __PDP_ENDIAN) c = u_int.byte[0]; u_int.byte[0] = u_int.byte[1]; u_int.byte[1] = c; c = u_int.byte[2]; u_int.byte[2] = u_int.byte[3]; u_int.byte[3] = c;#elif (__BYTE_ORDER == __BIG_ENDIAN)#else#error Unknown byte order.#endif if (fwrite (u_int.byte, 1, 4, stdout) != 4) Error (IO_err); } } /* else if (output_format == UCSwchar_t) { while ((c = getchar ()) != EOF) if (fwrite (&charset_p->charmap[c], sizeof (wchar_t), 1, stdout) != 1) Error (IO_err); } */ else if (output_format == UTF8f) Pipe_to_UTF8 (stdin); else Error (internal_err); } if (feof (stdin)) exit (0); else Error (IO_err); exit (101);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -