📄 2utf.c
字号:
#define _POSIX_C_SOURCE 2#include <ctype.h>#include <errno.h>#include <glob.h>/*#include <printf.h>*/#include <stdlib.h>#include <stdio.h>#include <string.h>#include <sys/types.h>#include <unistd.h>#include <values.h>#include <errno.h>#ifndef __BYTE_ORDER#ifdef BYTE_ORDER#define __BYTE_ORDER BYTE_ORDER#define __LITTLE_ENDIAN LITTLE_ENDIAN#define __BIG_ENDIAN BIG_ENDIAN#define __PDP_ENDIAN PDP_ENDIAN#else#error __BYTE_ORDER undefined.#endif#endif#include "2UTF.h"static char version[] = " 2UTF V" VERSION " \n";static char blurb[] =" © Copyright 1997, 1998, 2000 by Ričardas Čepas <rch@richard.eu.org> and others. \n"" Copying policy: BSD style. \n"" See file 'copyright' provided with the 2UTF distribution. \n"" No warranty. Use at your own risk. \n";#ifndef USE_LIBC#define mbtowc our_mbtowc#define wctomb our_wctomb#endif#ifndef CONFIG_PATHNAMES#define CONFIG_PATHNAMES "/usr/local/etc/2UTF.config /usr/etc/2UTF.config /etc/2UTF.config"#endif#define max_string_length 1178#ifndef ALIASES#define ALIASES "/var/local/lib/2UTF.aliases"#endif#ifndef PATH#define PATH "/usr/local/share/i18n/charmaps/"#endif#ifndef PATH2#define PATH2 "/usr/share/i18n/charmaps/"#endif#ifndef PATH3#define PATH3 "/usr/share/i18n/charmap/"#endif#ifndef MAX_PATHNAMES#define MAX_PATHNAMES 24#endif#ifndef MAX_EXT_CHARSETS#define MAX_EXT_CHARSETS 50#endifchar aliases_pathname[] = ALIASES;char default_charmap_format[] = " %*s /x%2x <U%X> ";char *charmap_format[] ={default_charmap_format, " 0x%x 0x%X ", " 0x%x 0x%X "};unsigned char *compiled_paths[] ={PATH, PATH2, PATH3, ""};const int compiled_paths_number = 3;int paths_number = 3;unsigned char *read_paths[MAX_PATHNAMES];unsigned char **paths = compiled_paths;int ext_charsets_number = 0;const char FILENAME[] = "FILENAME";const char IO_err[] = "I/O error.";const char IO_err_reading_config[] = "I/O error reading configuration file.";const char add_iconv_only[] = "This option can only be used with --iconv=only.";const char ambig_opt[] = "ambiguous option.";const char avail_charmaps[] = "charmaps and aliases I can handle beyond iconv(3) and available in cache:";const char avail_ext_charsets[] = "charsets I can handle via external filters:";const char bad_charmap_format[] = "charmap file format error ?";const char bad_line[] = "BAD LINE in aliases file:";const char buffer_overflow[] = "buffer overflow by";/*const char bad_sscanf_format_string[]= "bad sscanf(3) format string"; */const char can_not_exec[] = "can't execute such command or fork subprocess:";const char can_not_find_alias[] = "can't find such alias:";const char can_not_open[] = "can't open for reading file";const char can_not_open_any[] = " -- can't open matching files.";const char can_not_create[] = "can't create file";const char for_[] = "for";const char help[] = "%s"" Converts char-sets to and from Unicode. Decodes MIME text messages. \n""\n"" Usage: \n""2UTF [-short_options] [--long_option ...] [charmap_file_or_alias] <input >output \n""fromUTF ... \n"" If exact match for <charmap_file_or_alias> (converted to uppercase, \n" \"`-' and '_' ignored) isn't found *<charmap_file_or_alias>* glob pattern \n" \"is used. Without <charmap_file_or_alias> mail message is assumed.\n"" Options: \n"" -- stops option checking for the rest of the command line \n"" -2 --UCS-2 --ucs-2 2 byte wide characters \n"" -4 --UCS-4 --ucs-4 4 byte wide characters \n"/*" -w --UCS-wchar_t --ucs-wchar_t sizeof(wchar_t) byte wide characters \n" */" -8 --UTF-8 --utf-8 (default) multibyte characters \n"" -C --create-aliases (re)creates aliases database \n"" -c FILENAME --charmap-file=FILENAME \n"" -d[N] --debug[=N] debug level (1-9), default 1 \n"" -f[FORMAT] --format[=FORMAT] sscanf(3) format string for reading charmap \n""file. Lines beginning with %% or # are ignored. Default is \"%s\" \n"" -e --encode-headers reencode MIME encoded headers \n"" -o --forward (default for 2UTF) converts to Unicode \n"" -H --html &<>\" appeared after approximations are escaped\n"" -h -? --? -help --help this help \n"" -i only --iconv=only don't read configuration file and use iconv() only \n"" -i first --iconv=last attempt to use iconv before or after charmap files \n"" -l --list-charmaps lists charmaps & aliases and exits \n"" -p --pathnames outputs various pathnames and directories \n"" -r --reverse (default for fromUTF) tries convert back to the legacy encodings \n"" -W --show-charmap shows glyphs in charmap order \n"" -S --spit-glyphs shows glyphs in console font (F000-F1FF) \n"" -S... --spit-glyphs=[min][-][max] shows glyphs at given hex range. \n""Allowed range is from 0 to 7FFFFFFF. \n"" -s --switch-to-UTF-8 outputs <ESC>%%G to stderr for switching \n""current virtual terminal to UTF-8 mode \n"" -u[X] --unknown-char[=X] substitute X for unknown characters. \n""Default is '%c' (0x%.2X). \n"" -v --verbose \n"" -V --version --blurb shows version and copyright info. \n"" Rightmost option takes precedence. Long options may be abbreviated. \n";const char incomplete_charmap[] = "warning: incomplete charmap definiton";const char internal_err[] = "internal error ?";const char long_file[] = "warning: long charmap file.";const char another_match[] = "warning: another match for this alias";const char more_help[] = "``2UTF -h'' gives more information. \n";const char multiply_matches[] = "warning: multiply matches for glob pattern";const char needs_update[] = "aliases database needs update.";const char no_charmaps[] = "no charmap files found.";const char no_pathnames[] = "no pathnames found in configuration file.";const char out_of_mem[] = "out of memory !";const char paths_help_config_pathname[] = " Looks for configuration file as: \n";const char paths_help_compiled[] = " If no directories in configuration file " \"are found looks for charmap files in: \n";const char paths_help_used[] = " Currently looks for charmap files in: \n";const char paths_help_aliases[] =" Aliases are cached in: \n""``" ALIASES "'' \n"" \n";const char short_help[] = \"Usage: 2UTF|fromUTF [-short_options] [--long_option ...] [charset] <in >out \n";const char too_many_pathnames[] = "too many pathnames in configuration file.";const char too_many_ext_charsets[] = \"too many charset definitions in configuration file.";const char unexp_EOF[] = "unexpected end of file.";const char unimplemented[] = "Sorry, this is not implemented.";const char unknown_opt[] = "unknown option.";const char using[] = "using";const char will_use[] = "will use";int Debug = FALSE, Mail = FALSE, encode = FALSE, iconv_first = TRUE, iconv_only = FALSE, reverse = FALSE, show_charmap = FALSE, verbose = FALSE;/* stdout may be variable */struct charset_typeunknown_charset = {NULL, "", NO, UNKNOWN, NULL, NULL},USASCII_charset = {NULL, "us-ascii", IS, USASCII, NULL, NULL},UTF8_charset = {NULL, "UTF-8", IS, UTF8, NULL, NULL};struct charset_type *charset_p = &unknown_charset;struct line_buf_type line ={NULL, 0};struct{ unsigned char *names, *to_UTF, *from_UTF; enum USASCII_is_subset_type USASCII_is_subset;}ext_charsets[MAX_EXT_CHARSETS + 1];wchar_t unknown_wchar = 0xFFFD;inline int our_wctomb (char *s, unsigned long wc);inline int our_mbtowc (wchar_t *p, char *s, unsigned n); inline enum charmap_file_format_typeCharmap_file_format_type (char *pathname){ char *ptr; if (pathname && (ptr = strrchr (pathname, '.'))) { if (strcmp (".TXT", ptr) == 0 || Strcasecmp (".x", ptr) == 0) return (TXT); } return (DEFAULT);} voidClose_pipe (void){ if (charset_p->type == BUF_PIPE || charset_p->type == NON_BUF_PIPE) pclose (charset_p->pipe);} char *Convert (char *to, char *from){ register int length; if (charset_p->type == KNOWN) { while (*from) { length = wctomb (to, charset_p->charmap[(int) (unsigned char) *from]); if (length != -1) to += length; else if (*(to += wctomb (to, unknown_wchar)) == -1) Error ("Bad unknown_wchar value"); from++; } return (to); } else return (Stpcpy (to, from));} char *Encode_MIME_word (char *from, size_t sz){ unsigned char *f, *t, *to; const char b64[64] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' }; /* =?UTF-8?B?..?= */ to = xmalloc(24 + 4*strlen(f=from) / 3); strcpy(to, "=?UTF-8?B?"); t = to + 10; while (sz) { /* 765432 10:7654 3210:76 543210 543210 54:3210 5432:10 543210 */ *t++ = b64[*f>>2 & (1<<6)-1]; if (sz == 1) { *t++ = b64[(*f & (1<<2)-1) << 4]; *t++ = '='; *t++ = '='; sz=0; } else { *t++ = b64[(*f & (1<<2)-1) << 4 | f[1] >> 4]; if (sz == 2) { *t++ = b64[(f[1] & (1<<4)-1) << 2]; *t++ = '='; sz=0; } else { *t++ = b64[(f[1] & (1<<4)-1) << 2 | f[2] >> 6]; *t++ = b64[f[2] & (1<<6)-1]; sz -= 3; f += 3; } } } strcpy(t, "?="); return (to);} intCreate_aliases (void){ FILE *aliases_file; struct charmap_file_type charmap_file; char alias[max_string_length], *charmap_filename, *ptr, *pathname, s[max_string_length]; glob_t glob_buffer = {0, NULL, 0, 0}; int glob_flags = 0, length; register int c, index; wchar_t charmap[256], unknown_wchar = 0; if ((aliases_file = fopen (aliases_pathname, "wt")) == NULL) { /* fprintf (stderr, "2UTF: %s '%s' \n", can_not_create, aliases_pathname); */ return (0); } for (length = strlen (paths[0]), index = 1; index < paths_number; index++) if (strlen (paths[index]) > strlen (paths[index - 1])) length = strlen (paths[index]); pathname = xmalloc (length + 5); if (Debug >= 9) fprintf (stderr, "2UTF: malloc (%i) \n", (length + 5)); for (index = 0; index < paths_number; index++) { strcat (strcpy (pathname, paths[index]), "*"); c = glob (pathname, glob_flags, NULL, &glob_buffer); glob_flags |= GLOB_APPEND; switch (c) { case GLOB_NOSPACE: Error (out_of_mem); default: if (Debug >= 5) fprintf (stderr, "2UTF: glob() returned %i \n", c); } } free (pathname); if (glob_buffer.gl_pathc < 1) Error (no_charmaps); for (index = 0; index < glob_buffer.gl_pathc; index++) { if ((charmap_file.stream = fopen (glob_buffer.gl_pathv[index], "rt")) == NULL) { if (verbose) fprintf (stderr, "2UTF: %s '%s' \n", can_not_open, glob_buffer.gl_pathv[index]); } else { charmap_file.format = Charmap_file_format_type (glob_buffer.gl_pathv[index]); c = Get_charmap (charmap, &charmap_file, unknown_wchar); switch (c) { case err_short: if (verbose) fprintf (stderr, "2UTF: %s: \n '%s' \n", incomplete_charmap, glob_buffer.gl_pathv[index]); case err_long: if (Debug >= 8 && c == err_long) fprintf (stderr, "2UTF: %s '%s' \n", long_file, glob_buffer.gl_pathv[index]); case OK: if (verbose) fprintf (stderr, "2UTF: %s '%s' \n", using, glob_buffer.gl_pathv[index]); if ((ptr = strrchr (glob_buffer.gl_pathv[index], '/')) == NULL) ptr = glob_buffer.gl_pathv[index]; else ptr++; charmap_filename = xstrdup (ptr); if (charmap_file.format != DEFAULT &&strrchr (charmap_filename, '.')) *strrchr (charmap_filename, '.') = '\0'; fprintf (aliases_file, "%s %s ", glob_buffer.gl_pathv[index], Strtoupper (charmap_filename)); rewind (charmap_file.stream); while (fgets (s, max_string_length, charmap_file.stream) != NULL) if (s[0] == '%' && sscanf ((s + 1), " alias %s ", alias) >= 1) fprintf (aliases_file, "%s ", Strtoupper (alias)); fputs (" \n", aliases_file); if (!feof (charmap_file.stream)) fprintf (stderr, "\a2UTF: %s '%s' \n", IO_err, glob_buffer.gl_pathv[index]); break; case err_few_chars: if (verbose) fprintf (stderr, "2UTF: %s '%s' \n", bad_charmap_format, glob_buffer.gl_pathv[index]); break; case err_IO: fprintf (stderr, "\a2UTF: %s '%s' \n", IO_err, glob_buffer.gl_pathv[index]); break; case err_internal: fprintf (stderr, "\a2UTF: %s '%s' \n", internal_err, glob_buffer.gl_pathv[index]); break; } if (EOF == fclose (charmap_file.stream)) Error (IO_err); } } if (EOF == fclose (aliases_file)) Error (IO_err); globfree (&glob_buffer); return (1);} voidError (const char *message){ register int c; fprintf (stderr, "2UTF: %s \n", message); if (Mail) while ((c = getc (stdin)) != EOF) putchar (c); exit (1);}inline FILE *Fopen_charmap (struct charset_type *charset_p, struct charmap_file_type *charmap_file_p){ FILE *aliases_file; char *charmap_name, *pathname, *ptr, *s = NULL; glob_t glob_buffer = {0, NULL, 0, 0}; int found_alias = FALSE, can_not_create_aliases_file = FALSE; register int c, index, length; size_t s_length = 0; charmap_file_p->stream = NULL; charmap_file_p->format = DEFAULT; if (strchr (charset_p->name, '/') != NULL) { charmap_file_p->stream = fopen (charset_p->name, "rt"); charmap_file_p->format = Charmap_file_format_type (charset_p->name); } else { if ((aliases_file = fopen (aliases_pathname, "rt")) == NULL) { fprintf (stderr, "2UTF: %s %s \n", can_not_open, aliases_pathname); if (!Create_aliases ()) { can_not_create_aliases_file = 1; fprintf (stderr, "2UTF: %s '%s' \n", can_not_create, aliases_pathname); } else if ((aliases_file = fopen (aliases_pathname, "rt")) == NULL) Error (internal_err); } charmap_name = xmalloc (strlen (charset_p->name) + 5); if (Debug >= 9) fprintf (stderr, "2UTF: malloc (%i) \n", (int)(strlen (charset_p->name) + 5)); Strtoupper (strcat (strcat (strcpy (charmap_name, " "), charset_p->name), " ")); if (aliases_file != NULL) { for (index = 0; index < 2; index++) { if (Debug >= 3) fprintf (stderr, "2UTF: Looking for alias '%s' \n", charmap_name); while (Getline ((unsigned char **) &s, &s_length, aliases_file) != (size_t) -1) if ((strchr (s, ' ') != NULL) && (Str_str (strchr (s, ' '), charmap_name) != NULL)) { strtok (s, "\n"); if (!found_alias) { if (verbose) fprintf (stderr, "2UTF: %s - %s: \n '%s' \n", charmap_name, will_use, s); charmap_file_p->stream = fopen (ptr = strtok (s, " "), "rt"); charmap_file_p->format = Charmap_file_format_type (ptr); } else if (verbose) fprintf (stderr, "2UTF: %s - %s: \n '%s' \n", charmap_name, another_match, s); else fprintf (stderr, "2UTF: %s. \n", another_match); found_alias = 1; } if (!feof (aliases_file)) if (ferror (aliases_file)) Error (IO_err); else Error (out_of_mem); if (!found_alias) { if (index == 0) { clearerr (aliases_file); rewind (aliases_file); memmove (charmap_name, &charmap_name[1], strlen (charmap_name) - 1); strtok (charmap_name, " "); } } else break; } free (s); if (EOF == fclose (aliases_file)) Error (IO_err); } if (!found_alias || charmap_file_p->stream == NULL) { if (charmap_name[0] == ' ') { memmove (charmap_name, &charmap_name[1], strlen (charmap_name) - 1); strtok (charmap_name, " "); } if (Debug >= 3) fprintf (stderr, "2UTF: Looking for file '%s' \n", charmap_name); for (length = strlen (paths[0]), index = 1; index < paths_number; index++)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -