📄 2utf.c
字号:
} if ((encp = encoding)) { PUTC_IN_UTF8 ('?'); while (*encp) { PUTC_IN_UTF8 (*encp); encp++; } }} inline intParse_string (char *charmap_format_string, wchar_t * charmap, char *s){ unsigned int index; unsigned long unichar; if (s[0] == '#' || s[0] == '%') return (0); if (sscanf (s, charmap_format_string, &index, &unichar) == 2) { charmap[index] = (wchar_t) unichar; if (Debug > 8) fprintf (stderr, "2UTF: '%s' --> 0x%.2X is U+%.4lX.\n" ,s, index, (long unsigned int) charmap[index]); return (1); } else return (0);} inline voidPipe_to_UTF8 (FILE * in_stream){ register int c; if (charset_p->type == ICONV) while ((c = getc (in_stream)) != EOF) Put_to_iconv_buf (c, charset_p); else if (charset_p->type == KNOWN) while ((c = getc (in_stream)) != EOF) Putc_in_UTF8 (c, charset_p); else while ((c = getc (in_stream)) != EOF) if (putc (c, charset_p->pipe) == EOF) Error (IO_err); if (charset_p->type == ICONV) Put_to_iconv_buf (-1, charset_p); if (ferror (in_stream)) Error (IO_err);} inline intPutc_in_UTF8 (int c, struct charset_type * charset_p){ char s[12]; register int length; length = wctomb (s, charset_p->charmap[c]); if (length != -1) { if (fwrite (s, 1, length, charset_p->pipe) != (size_t) length) Error (IO_err); } else if (putc (0x80, charset_p->pipe) == EOF) Error (IO_err); return (c);} inline intPut_to_iconv_buf (int c, struct charset_type * charset_p){ /* c == -1 means flush buffer */ if (c>=0) { *charset_p->iconv_p->inendptr++ = c; charset_p->iconv_p->inleft++; } if (c<0 || charset_p->iconv_p->inendptr - charset_p->iconv_p->in_buf > ICONV_IN_BUF_SIZE - 10) { /* buffer is full - do conversion */ size_t outleft, conv, sz; char * inptr; /* inleft = charset_p->iconv_p->in_buf + ICONV_IN_BUF_SIZE - charset_p->iconv_p->inptr; */ outleft = charset_p->iconv_p->out_buf + ICONV_OUT_BUF_SIZE - charset_p->iconv_p->outptr; inptr = charset_p->iconv_p->in_buf; while (charset_p->iconv_p->inleft > 0) { errno = 0; conv = iconv (charset_p->iconv_p->iconv_handle, &inptr, &charset_p->iconv_p->inleft, &charset_p->iconv_p->outptr, &outleft); if (conv == (size_t) -1 || errno) { if (errno == EINVAL) { if (c<0 || Debug > 4) fprintf (stderr, "2UTF: iconv error because of incomplete multibyte character; input is '%.2s'\n", inptr); memmove (charset_p->iconv_p->in_buf, inptr, charset_p->iconv_p->inleft); break; } else if (errno == EILSEQ) { if (Debug>3) fprintf (stderr, "2UTF: iconv EILSEQ error; input is '%.2s'\n", inptr); charset_p->iconv_p->inleft--; outleft--; *charset_p->iconv_p->outptr++ = *inptr++; continue; } else { /* if (Debug) */ fprintf (stderr, "2UTF: iconv unknown error errno %i; input is '%.2s'\n", errno, inptr); memmove (charset_p->iconv_p->outptr, inptr, charset_p->iconv_p->inleft); charset_p->iconv_p->inleft = 0; break; } } } charset_p->iconv_p->inendptr = charset_p->iconv_p->in_buf + charset_p->iconv_p->inleft; if (sz = charset_p->iconv_p->outptr - charset_p->iconv_p->out_buf) { charset_p->iconv_p->outptr = charset_p->iconv_p->out_buf; if (c == -3) { char * enc; if (fputs(enc = Encode_MIME_word(charset_p->iconv_p->out_buf, sz), charset_p->pipe) < 0) Error (IO_err); free(enc); } else { if (fwrite(charset_p->iconv_p->out_buf, sizeof(char), sz, charset_p->pipe) <= 0) Error (IO_err); } } } if (c == -2) fflush (charset_p->pipe); return 1;} intRead_config_file (){ FILE *config_file; enum { PATHS, CHARSETS } state; size_t size; unsigned char *config_pathnames = CONFIG_PATHNAMES; unsigned char *config_pathname = config_pathnames; unsigned char *cp, *cp2, *nl; /* check for double null terminator */ while (*(config_pathname = strchr (config_pathname, '\0') + 1) != '\0'); config_pathname = config_pathnames; do { if (Debug > 4) fprintf (stderr, "2UTF: Looking for '%s' \n", config_pathname); if ((config_file = fopen (config_pathname, "rt")) == NULL) if (*(config_pathname = strchr (config_pathname, '\0') + 1) == '\0') return (FALSE); } while (config_file == NULL); if (Debug > 2) fprintf (stderr, "2UTF: Opened configuration file '%s' \n", config_pathname); if ((size = Getdelim (&line.buf, &line.length, '\0', config_file)) == (size_t) -1 || EOF == fclose (config_file)) Error (IO_err_reading_config); cp = line.buf; paths_number = 0; paths = read_paths; ext_charsets_number = 0; state = PATHS; do { if ((nl = strchr (cp, '\n')) != NULL) *nl = '\0'; if (*cp != '#') { switch (state) { case PATHS: if ((cp = strtok (cp, " \t\n"))) { if (Strcasecmp (cp, "[charsets]") == 0) { state = CHARSETS; } else { if (paths_number >= MAX_PATHNAMES) Error (too_many_pathnames); paths[paths_number++] = xstrdup (cp); if (Debug > 2) fprintf (stderr, "2UTF: Pathname for charmaps in configuration file: \n '%s' \n", paths[paths_number - 1]); } } break; case CHARSETS: { if (ext_charsets_number >= MAX_EXT_CHARSETS) Error (too_many_ext_charsets); ext_charsets[ext_charsets_number].USASCII_is_subset = NO; if (strncmp (cp += strspn (cp, " \t"), "[US-ASCII_is_subset]", strlen ("[US-ASCII_is_subset]")) == 0) { cp += strlen ("[US-ASCII_is_subset]") + 1; ext_charsets[ext_charsets_number].USASCII_is_subset = IS; } *(cp2 = ext_charsets[ext_charsets_number].names = xmalloc (strlen (cp) + 3)) = ' '; *(cp2 = Stpcpy (cp2 + 1, cp)) = ' '; *(cp2 + 1) = '\0'; if (!strtok (cp, " \t\n") || nl == NULL) break; if (Debug > 2) { fprintf (stderr, "2UTF: Charset names: '%s' \n", ext_charsets[ext_charsets_number].names); if (ext_charsets[ext_charsets_number].USASCII_is_subset == IS) fprintf (stderr, "2UTF: US-ASCII is subset " "of this charset. \n"); } Strtoupper (cp2 = ext_charsets[ext_charsets_number].names); while (*cp2++) if (*cp2 == '\t') *cp2 = ' '; cp = nl + 1; if ((nl = strchr (cp, '\n')) != NULL) *nl = '\0'; ext_charsets[ext_charsets_number].to_UTF = xstrdup (cp); if (!strtok (cp, " \t\n")) ext_charsets[ext_charsets_number].to_UTF = NULL; else if (Debug > 2) fprintf (stderr, "2UTF: To UTF-8: '%s' \n", ext_charsets[ext_charsets_number].to_UTF); if (nl != NULL) { cp = nl + 1; if ((nl = strchr (cp, '\n')) != NULL) *nl = '\0'; ext_charsets[ext_charsets_number].from_UTF = xstrdup (cp); if (!strtok (cp, " \t\n")) ext_charsets[ext_charsets_number].from_UTF = NULL; else if (Debug > 2) fprintf (stderr, "2UTF: From UTF-8: '%s' \n", ext_charsets[ext_charsets_number].from_UTF); } if (ext_charsets[ext_charsets_number].to_UTF == NULL &&ext_charsets[ext_charsets_number].from_UTF == NULL) break; if (Debug > 2) fprintf (stderr, "2UTF: Entry %i O.K. \n", ext_charsets_number); ext_charsets_number++; } break; } } if (nl == NULL) break; else { cp = nl + 1; continue; } } while (TRUE); if (paths_number <= 0) paths = compiled_paths; /*Error (no_pathnames); */ return (TRUE);} inline intSeek_boundary (FILE * in_stream, char *boundary, int push_boundary){ int ret = FALSE; size_t length=0; unsigned char *lbp; while (ret == FALSE) { if (in_stream) { if ((length = Getline (&line.buf, &line.length, in_stream)) == (size_t) -1) if (ferror (in_stream)) Error (IO_err); else { /* FLUSH_OUTPUT(charset_p); */ return (FALSE); } lbp = line.buf; if (lbp[0] == '-' && lbp[1] == '-') { ret = Str_is_boundary (lbp + 2, boundary, push_boundary); push_boundary = FALSE; } } else { ret = Str_is_boundary (lbp = NULL, boundary, push_boundary); push_boundary = FALSE; } if (ret == TRUE || ret == END_BOUNDARY) if (!charset_p->USASCII_is_subset) { /* FLUSH_OUTPUT(charset_p); */ Validate_charset ("us-ascii", 0); } if (ret < OUTER_BOUNDARY) { if (lbp) while (lbp < line.buf + length) { PUTC_IN_UTF8 ((unsigned char) *lbp); lbp++; EMPTY_BUFFERS(charset_p); } else fprintf (charset_p->pipe, ret == END_BOUNDARY ? "--%s--\n" : "--%s\n", boundary); } } /*FLUSH_OUTPUT(charset_p);*/ return (ret);} voidShort_help (int exit_code){ fprintf (stderr, "%s%s", short_help, more_help); exit (exit_code);} intStrcasecmp (const char *str1, const char *str2){ while (toupper(*str1)==toupper(*str2) && *str1) { str1++; str2++; } return ( toupper(*str1)-toupper(*str2) );} intStrcase_has_prefix (const char *string, const char *prefix){ while (toupper(*string)==toupper(*prefix) && *prefix) { string++; prefix++; } return (*prefix=='\0' ? 0 : toupper(*string)-toupper(*prefix) );} char *Stpcpy (char *To, const char *From){ register char *to=To; while ((*to=*From++)) to++; return (to);} char *Stpncpy (char *To, const char *From, size_t count){ register char *to=To; char *ret; while ((size_t)(to-To) < count) if ((*to=*From++)) to++; else break; ret = to; while ((size_t)(to-To) < count) *to++ = '\0'; return (ret);} intStr_has_prefix (const char *string, const char *prefix){ while (*string==*prefix && *prefix) { string++; prefix++; } return (*prefix=='\0' ? 0 : *string-*prefix );} intStr_is_UTF8 (char *charmap_filename){ register int i, iplus; /* Unicode-1-1-UTF-8 */ if (Strcasecmp (charmap_filename, "UTF-8") == 0 || \ Strcasecmp (charmap_filename, "UTF-2") == 0 || \ (Strcase_has_prefix (charmap_filename, "Unicode-") == 0 \ &&(iplus = strspn ( \ charmap_filename + (i = strlen ("Unicode-")), "1234567")) > 0 && \ charmap_filename[i += iplus] == '-' && \ (iplus = strspn (charmap_filename + (i += 1), "01234567")) > 0 && \ (Strcasecmp (charmap_filename + (i += iplus), "-UTF-8") == 0 || \ Strcasecmp (charmap_filename + i, "-UTF-2") == 0) \ ) \ ) return (TRUE); else return (FALSE);} inline intStr_is_boundary (char *linebuf, char *boundary, int push_boundary){ int boundary_len; int ret = FALSE, index; static struct { char **lines; int last, pending, pending_ret; } boundaries = { NULL, -1, -1, 0 }; unsigned char *lbp; if (push_boundary) { if (++boundaries.last % 10 == 0) boundaries.lines = xrealloc (boundaries.lines, \ sizeof (boundaries.lines[0]) * (boundaries.last + 10)); boundaries.lines[boundaries.last] = xstrdup (boundary); } if (boundaries.pending != -1) { if (linebuf) Error (internal_err); if (line.length < 90) line.buf = xrealloc (line.buf, 90); *(lbp = Stpcpy (line.buf, boundaries.lines[boundaries.pending])) = '\n'; lbp[1] = '\0'; if (boundaries.pending_ret == END_BOUNDARY) strcpy (lbp, "--\n"); lbp = line.buf; } else if (!(lbp = linebuf)) Error (internal_err); for (index = boundaries.last; index >= 0; index--) { boundary_len = strlen (boundaries.lines[index]); if (strncmp (lbp, boundaries.lines[index], boundary_len) == 0) if (lbp[boundary_len +strspn (lbp + boundary_len, " \t")] == '\n') { if (Debug >= 2) fprintf (stderr, " header boundary line found\n"); ret = TRUE; break; } else if (lbp[boundary_len] == '-' \ &&lbp[boundary_len + 1] == '-' \ &&(lbp[boundary_len + 2 \ +strspn (lbp + boundary_len + 2, " \t")] == '\n')) { if (Debug >= 2) fprintf (stderr, " -- end boundary line found\n"); ret = END_BOUNDARY; break; } } if (ret) if (index == boundaries.last) boundaries.pending = -1; else { boundaries.pending = index; if (Debug) fprintf (stderr, " -- this is boundary of outer message !\n"); boundaries.pending_ret = ret; ret = OUTER_BOUNDARY + boundaries.last - index;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -