⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 convert_unicode.c

📁 一个C语言写的快速贝叶斯垃圾邮件过滤工具
💻 C
字号:
/* $Id: convert_unicode.c,v 1.17 2005/12/31 09:56:18 m-a Exp $ *//*****************************************************************************NAME:   charset_iconv.c -- provide charset support using iconv().Note:   Character translation is done to make life easier for the lexer.   Text is changed only after the message has been saved for   passthrough.  The end user (mail reader) never sees any changes -   only the lexer.AUTHOR:   David Relson <relson@osagesoftware.com>  2005******************************************************************************/#include "common.h"#include <ctype.h>#include <errno.h>#include <stdlib.h>#include <string.h>#include "charset.h"#include "convert_unicode.h"#include "chUnicodeTo866.h"#include "xmalloc.h"#include "xstrdup.h"#define	SP	' '#include <iconv.h>iconv_t cd = NULL;static void map_nonascii_characters(void){    uint ch;    for (ch = 0; ch < COUNTOF(charset_table); ch += 1)    {	/* convert high-bit characters to '?' */	if (ch & 0x80 && casefold_table[ch] == ch)	    casefold_table[ch] = '?';    }}static void map_default(void){    unsigned int ch;    for (ch = 0; ch < COUNTOF(charset_table); ch += 1)    {	charset_table[ch] = casefold_table[ch] = ch;    }    for (ch=0; ch < COUNTOF(charset_table); ch += 1)    {	if (iscntrl(ch) &&		/* convert control characters to blanks */	    ch != '\t' && ch != '\n')	/* except tabs and newlines		*/	    charset_table[ch] = SP;    }}typedef struct charset_def {    const char *name;    bool allow_nonascii_replacement;} charset_def_t;#define	T	true#define	F	falsestatic charset_def_t charsets[] = {    { "default",	T },    { "us-ascii",	T },    { "utf-8",		T },    { "iso8859-1",	T },		/* ISOIEC 8859-1:1998 Latin Alphabet No. 1	*/    /* tests/t.systest.d/inputs/spam.mbx is iso-8859-1 and contains     * 8-bit characters - " 揧our Account

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -