⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 2utf.c

📁 一个UTF的源代码,可以提供参考
💻 C
📖 第 1 页 / 共 5 页
字号:
#define _POSIX_C_SOURCE 2#include <ctype.h>#include <errno.h>#include <glob.h>/*#include <printf.h>*/#include <stdlib.h>#include <stdio.h>#include <string.h>#include <sys/types.h>#include <unistd.h>#include <values.h>#include <errno.h>#ifndef __BYTE_ORDER#ifdef BYTE_ORDER#define __BYTE_ORDER    BYTE_ORDER#define	__LITTLE_ENDIAN	LITTLE_ENDIAN#define	__BIG_ENDIAN	BIG_ENDIAN#define	__PDP_ENDIAN	PDP_ENDIAN#else#error __BYTE_ORDER undefined.#endif#endif#include "2UTF.h"static char version[] = "   2UTF  V" VERSION " \n";static char blurb[] =" © Copyright 1997, 1998, 2000 by Ričardas Čepas <rch@richard.eu.org> and others. \n"" Copying policy: BSD style. \n"" See file 'copyright' provided with the 2UTF distribution. \n"" No warranty. Use at your own risk. \n";#ifndef USE_LIBC#define mbtowc our_mbtowc#define wctomb our_wctomb#endif#ifndef CONFIG_PATHNAMES#define CONFIG_PATHNAMES "/usr/local/etc/2UTF.config /usr/etc/2UTF.config /etc/2UTF.config"#endif#define max_string_length 1178#ifndef ALIASES#define ALIASES "/var/local/lib/2UTF.aliases"#endif#ifndef PATH#define PATH "/usr/local/share/i18n/charmaps/"#endif#ifndef PATH2#define PATH2 "/usr/share/i18n/charmaps/"#endif#ifndef PATH3#define PATH3 "/usr/share/i18n/charmap/"#endif#ifndef MAX_PATHNAMES#define MAX_PATHNAMES 24#endif#ifndef MAX_EXT_CHARSETS#define MAX_EXT_CHARSETS 50#endifchar aliases_pathname[] = ALIASES;char default_charmap_format[] = " %*s /x%2x <U%X> ";char *charmap_format[] ={default_charmap_format, " 0x%x 0x%X ", " 0x%x 0x%X "};unsigned char *compiled_paths[] ={PATH, PATH2, PATH3, ""};const int compiled_paths_number = 3;int paths_number = 3;unsigned char *read_paths[MAX_PATHNAMES];unsigned char **paths = compiled_paths;int ext_charsets_number = 0;const char FILENAME[] = "FILENAME";const char IO_err[] = "I/O error.";const char IO_err_reading_config[] = "I/O error reading configuration file.";const char add_iconv_only[] = "This option can only be used with --iconv=only.";const char ambig_opt[] = "ambiguous option.";const char avail_charmaps[] = "charmaps and aliases I can handle beyond iconv(3) and available in cache:";const char avail_ext_charsets[] = "charsets I can handle via external filters:";const char bad_charmap_format[] = "charmap file format error ?";const char bad_line[] = "BAD LINE in aliases file:";const char buffer_overflow[] = "buffer overflow by";/*const char bad_sscanf_format_string[]= "bad sscanf(3) format string"; */const char can_not_exec[] = "can't execute such command or fork subprocess:";const char can_not_find_alias[] = "can't find such alias:";const char can_not_open[] = "can't open for reading file";const char can_not_open_any[] = "    -- can't open matching files.";const char can_not_create[] = "can't create file";const char for_[] = "for";const char help[] = "%s""  Converts char-sets to and from Unicode. Decodes MIME text messages. \n""\n""  Usage: \n""2UTF [-short_options] [--long_option ...] [charmap_file_or_alias] <input >output \n""fromUTF ... \n""  If exact match for <charmap_file_or_alias> (converted to uppercase, \n" \"`-' and '_' ignored) isn't found *<charmap_file_or_alias>* glob pattern \n" \"is used. Without <charmap_file_or_alias> mail message is assumed.\n""  Options: \n"" --   stops option checking for the rest of the command line \n"" -2 --UCS-2 --ucs-2              2 byte wide characters \n"" -4 --UCS-4 --ucs-4              4 byte wide characters \n"/*" -w --UCS-wchar_t --ucs-wchar_t  sizeof(wchar_t) byte wide characters \n" */" -8 --UTF-8 --utf-8    (default) multibyte characters \n"" -C --create-aliases             (re)creates aliases database \n"" -c FILENAME  --charmap-file=FILENAME \n"" -d[N] --debug[=N]               debug level (1-9), default 1 \n"" -f[FORMAT] --format[=FORMAT]    sscanf(3) format string for reading charmap \n""file. Lines beginning with %% or # are ignored. Default is \"%s\" \n"" -e --encode-headers             reencode MIME encoded headers \n"" -o --forward      (default for 2UTF) converts to Unicode \n"" -H --html                       &<>\" appeared after approximations are escaped\n"" -h -? --? -help --help          this help \n"" -i only --iconv=only            don't read configuration file and use iconv() only \n"" -i first --iconv=last           attempt to use iconv before or after charmap files \n"" -l --list-charmaps              lists charmaps & aliases and exits \n"" -p --pathnames                  outputs various pathnames and directories \n"" -r --reverse      (default for fromUTF) tries convert back to the legacy encodings \n"" -W --show-charmap               shows glyphs in charmap order \n"" -S --spit-glyphs                shows glyphs in console font (F000-F1FF) \n"" -S... --spit-glyphs=[min][-][max]  shows glyphs at given hex range. \n""Allowed range is from 0 to 7FFFFFFF. \n"" -s --switch-to-UTF-8            outputs <ESC>%%G to stderr for switching \n""current virtual terminal to UTF-8 mode \n"" -u[X]   --unknown-char[=X]      substitute X for unknown characters. \n""Default is '%c' (0x%.2X). \n"" -v --verbose  \n"" -V --version --blurb            shows version and copyright info. \n""  Rightmost option takes precedence. Long options may be abbreviated. \n";const char incomplete_charmap[] = "warning: incomplete charmap definiton";const char internal_err[] = "internal error ?";const char long_file[] = "warning: long charmap file.";const char another_match[] = "warning: another match for this alias";const char more_help[] = "``2UTF -h'' gives more information. \n";const char multiply_matches[] = "warning: multiply matches for glob pattern";const char needs_update[] = "aliases database needs update.";const char no_charmaps[] = "no charmap files found.";const char no_pathnames[] = "no pathnames found in configuration file.";const char out_of_mem[] = "out of memory !";const char paths_help_config_pathname[] = " Looks for configuration file as: \n";const char paths_help_compiled[] = " If no directories in configuration file " \"are found looks for charmap files in: \n";const char paths_help_used[] = " Currently looks for charmap files in: \n";const char paths_help_aliases[] ="  Aliases are cached in: \n""``" ALIASES "'' \n""  \n";const char short_help[] = \"Usage: 2UTF|fromUTF [-short_options] [--long_option ...] [charset] <in >out \n";const char too_many_pathnames[] = "too many pathnames in configuration file.";const char too_many_ext_charsets[] = \"too many charset definitions in configuration file.";const char unexp_EOF[] = "unexpected end of file.";const char unimplemented[] = "Sorry, this is not implemented.";const char unknown_opt[] = "unknown option.";const char using[] = "using";const char will_use[] = "will use";int Debug = FALSE, Mail = FALSE, encode = FALSE, iconv_first = TRUE, iconv_only = FALSE, reverse = FALSE, show_charmap = FALSE, verbose = FALSE;/* stdout may be variable */struct charset_typeunknown_charset = {NULL, "", NO, UNKNOWN, NULL, NULL},USASCII_charset = {NULL, "us-ascii", IS, USASCII, NULL, NULL},UTF8_charset = {NULL, "UTF-8", IS, UTF8, NULL, NULL};struct charset_type *charset_p = &unknown_charset;struct line_buf_type line ={NULL, 0};struct{  unsigned char *names, *to_UTF, *from_UTF;  enum USASCII_is_subset_type USASCII_is_subset;}ext_charsets[MAX_EXT_CHARSETS + 1];wchar_t unknown_wchar = 0xFFFD;inline int our_wctomb (char *s, unsigned long wc);inline int our_mbtowc (wchar_t *p, char *s, unsigned n);  inline enum charmap_file_format_typeCharmap_file_format_type (char *pathname){  char *ptr;  if (pathname && (ptr = strrchr (pathname, '.')))  {    if (strcmp (".TXT", ptr) == 0 || Strcasecmp (".x", ptr) == 0)      return (TXT);  }  return (DEFAULT);}  voidClose_pipe (void){  if (charset_p->type == BUF_PIPE || charset_p->type == NON_BUF_PIPE)    pclose (charset_p->pipe);}  char *Convert (char *to, char *from){  register int length;  if (charset_p->type == KNOWN)  {    while (*from)    {      length = wctomb (to, charset_p->charmap[(int) (unsigned char) *from]);      if (length != -1)	to += length;      else if (*(to += wctomb (to, unknown_wchar)) == -1)	Error ("Bad unknown_wchar value");      from++;    }    return (to);  }  else    return (Stpcpy (to, from));}  char *Encode_MIME_word (char *from, size_t sz){  unsigned char *f, *t, *to;  const char b64[64] = {    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'  };  /* =?UTF-8?B?..?= */  to = xmalloc(24 + 4*strlen(f=from) / 3);  strcpy(to, "=?UTF-8?B?");  t = to + 10;  while (sz)  {    /* 765432 10:7654 3210:76 543210       543210 54:3210 5432:10 543210     */    *t++ = b64[*f>>2 & (1<<6)-1];    if (sz == 1) {      *t++ = b64[(*f & (1<<2)-1) << 4];      *t++ = '=';      *t++ = '=';      sz=0;    } else {      *t++ = b64[(*f & (1<<2)-1) << 4 | f[1] >> 4];      if (sz == 2) {	*t++ = b64[(f[1] & (1<<4)-1) << 2];	*t++ = '=';	sz=0;      } else {	*t++ = b64[(f[1] & (1<<4)-1) << 2 | f[2] >> 6];	*t++ = b64[f[2] & (1<<6)-1];	sz -= 3;	f += 3;      }    }  }  strcpy(t, "?=");  return (to);}  intCreate_aliases (void){  FILE *aliases_file;  struct charmap_file_type charmap_file;  char alias[max_string_length], *charmap_filename, *ptr, *pathname, s[max_string_length];  glob_t glob_buffer =  {0, NULL, 0, 0};  int glob_flags = 0, length;  register int c, index;  wchar_t charmap[256], unknown_wchar = 0;  if ((aliases_file = fopen (aliases_pathname, "wt")) == NULL)  {    /*      fprintf (stderr, "2UTF: %s '%s' \n", can_not_create, aliases_pathname); */    return (0);  }  for (length = strlen (paths[0]), index = 1; index < paths_number; index++)    if (strlen (paths[index]) > strlen (paths[index - 1]))      length = strlen (paths[index]);  pathname = xmalloc (length + 5);  if (Debug >= 9)    fprintf (stderr, "2UTF: malloc (%i) \n", (length + 5));  for (index = 0; index < paths_number; index++)  {    strcat (strcpy (pathname, paths[index]), "*");    c = glob (pathname, glob_flags, NULL, &glob_buffer);    glob_flags |= GLOB_APPEND;    switch (c)    {      case GLOB_NOSPACE:	Error (out_of_mem);      default:	if (Debug >= 5)	  fprintf (stderr, "2UTF: glob() returned %i \n", c);    }  }  free (pathname);  if (glob_buffer.gl_pathc < 1)    Error (no_charmaps);  for (index = 0; index < glob_buffer.gl_pathc; index++)  {    if ((charmap_file.stream = fopen (glob_buffer.gl_pathv[index], "rt")) == NULL)    {      if (verbose)	fprintf (stderr, "2UTF: %s '%s' \n", can_not_open, glob_buffer.gl_pathv[index]);    }    else    {      charmap_file.format = Charmap_file_format_type (glob_buffer.gl_pathv[index]);      c = Get_charmap (charmap, &charmap_file, unknown_wchar);      switch (c)      {	case err_short:	  if (verbose)	    fprintf (stderr, "2UTF: %s: \n       '%s' \n", incomplete_charmap, glob_buffer.gl_pathv[index]);	case err_long:	  if (Debug >= 8 && c == err_long)	    fprintf (stderr, "2UTF: %s '%s' \n", long_file, glob_buffer.gl_pathv[index]);	case OK:	  if (verbose)	    fprintf (stderr, "2UTF: %s '%s' \n", using, glob_buffer.gl_pathv[index]);	  if ((ptr = strrchr (glob_buffer.gl_pathv[index], '/')) == NULL)	    ptr = glob_buffer.gl_pathv[index];	  else	    ptr++;	  charmap_filename = xstrdup (ptr);	  if (charmap_file.format != DEFAULT &&strrchr (charmap_filename, '.'))	    *strrchr (charmap_filename, '.') = '\0';	  fprintf (aliases_file, "%s %s ", glob_buffer.gl_pathv[index], Strtoupper (charmap_filename));	  rewind (charmap_file.stream);	  while (fgets (s, max_string_length, charmap_file.stream) != NULL)	    if (s[0] == '%' && sscanf ((s + 1), " alias %s ", alias) >= 1)	      fprintf (aliases_file, "%s ", Strtoupper (alias));	  fputs (" \n", aliases_file);	  if (!feof (charmap_file.stream))	    fprintf (stderr, "\a2UTF: %s '%s' \n", IO_err, glob_buffer.gl_pathv[index]);	  break;	case err_few_chars:	  if (verbose)	    fprintf (stderr, "2UTF: %s '%s' \n", bad_charmap_format, glob_buffer.gl_pathv[index]);	  break;	case err_IO:	  fprintf (stderr, "\a2UTF: %s '%s' \n", IO_err, glob_buffer.gl_pathv[index]);	  break;	case err_internal:	  fprintf (stderr, "\a2UTF: %s '%s' \n", internal_err, glob_buffer.gl_pathv[index]);	  break;      }      if (EOF == fclose (charmap_file.stream))	Error (IO_err);    }  }  if (EOF == fclose (aliases_file))    Error (IO_err);  globfree (&glob_buffer);  return (1);}  voidError (const char *message){  register int c;  fprintf (stderr, "2UTF: %s \n", message);  if (Mail)    while ((c = getc (stdin)) != EOF)      putchar (c);  exit (1);}inline  FILE *Fopen_charmap (struct charset_type *charset_p, struct charmap_file_type *charmap_file_p){  FILE *aliases_file;  char *charmap_name, *pathname, *ptr, *s = NULL;  glob_t glob_buffer =  {0, NULL, 0, 0};  int found_alias = FALSE, can_not_create_aliases_file = FALSE;  register int c, index, length;  size_t s_length = 0;  charmap_file_p->stream = NULL;  charmap_file_p->format = DEFAULT;  if (strchr (charset_p->name, '/') != NULL)  {    charmap_file_p->stream = fopen (charset_p->name, "rt");    charmap_file_p->format = Charmap_file_format_type (charset_p->name);  } else {    if ((aliases_file = fopen (aliases_pathname, "rt")) == NULL)    {      fprintf (stderr, "2UTF: %s %s \n", can_not_open, aliases_pathname);      if (!Create_aliases ())      {	can_not_create_aliases_file = 1;	fprintf (stderr, "2UTF: %s '%s' \n", can_not_create, aliases_pathname);      }      else if ((aliases_file = fopen (aliases_pathname, "rt")) == NULL)	Error (internal_err);    }    charmap_name = xmalloc (strlen (charset_p->name) + 5);    if (Debug >= 9)      fprintf (stderr, "2UTF: malloc (%i) \n", (int)(strlen (charset_p->name) + 5));    Strtoupper (strcat (strcat (strcpy (charmap_name, " "), charset_p->name), " "));    if (aliases_file != NULL)    {      for (index = 0; index < 2; index++)      {	if (Debug >= 3)	  fprintf (stderr, "2UTF: Looking for alias '%s' \n", charmap_name);	while (Getline ((unsigned char **) &s, &s_length, aliases_file) != (size_t) -1)	  if ((strchr (s, ' ') != NULL) && (Str_str (strchr (s, ' '), charmap_name) != NULL))	  {	    strtok (s, "\n");	    if (!found_alias)	    {	      if (verbose)		fprintf (stderr, "2UTF: %s - %s: \n   '%s' \n",		    charmap_name, will_use, s);	      charmap_file_p->stream = fopen (ptr = strtok (s, " "), "rt");	      charmap_file_p->format = Charmap_file_format_type (ptr);	    }	    else if (verbose)	      fprintf (stderr, "2UTF: %s - %s: \n   '%s' \n",		  charmap_name, another_match, s);	    else	      fprintf (stderr, "2UTF: %s. \n", another_match);	    found_alias = 1;	  }	if (!feof (aliases_file))	  if (ferror (aliases_file))	    Error (IO_err);	  else	    Error (out_of_mem);	if (!found_alias)	{	  if (index == 0)	  {	    clearerr (aliases_file);	    rewind (aliases_file);	    memmove (charmap_name, &charmap_name[1], strlen (charmap_name) - 1);	    strtok (charmap_name, " ");	  }	}	else	  break;      }      free (s);      if (EOF == fclose (aliases_file))	Error (IO_err);    }    if (!found_alias || charmap_file_p->stream == NULL)    {      if (charmap_name[0] == ' ')      {	memmove (charmap_name, &charmap_name[1], strlen (charmap_name) - 1);	strtok (charmap_name, " ");      }      if (Debug >= 3)	fprintf (stderr, "2UTF: Looking for file '%s' \n", charmap_name);      for (length = strlen (paths[0]), index = 1; index < paths_number; index++)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -