⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 convert_charset.c

📁 一个C语言写的快速贝叶斯垃圾邮件过滤工具
💻 C
字号:
/* $Id: convert_charset.c,v 1.2 2005/05/31 15:18:40 m-a Exp $ *//*****************************************************************************NAME:   charset.c -- provide charset support for bogofilter's lexer.Note:   Character translation is done to make life easier for the lexer.   Text is changed only after the message has been saved for   passthrough.  The end user (mail reader) never sees any changes -   only the lexer.AUTHOR:   David Relson <relson@osagesoftware.com>******************************************************************************/#include "common.h"#include <ctype.h>#include <stdlib.h>#include <string.h>#include "charset.h"#include "convert_charset.h"#include "xmalloc.h"#include "xstrdup.h"#define	SP	' 'static void map_default(void);static void map_us_ascii(void);static void map_iso_8859_1(void);static void map_iso_8859_2(void);static void map_iso_8859_3(void);static void map_iso_8859_4(void);#ifndef	CP866static void map_iso_8859_5(void);#endifstatic void map_iso_8859_6(void);static void map_iso_8859_7(void);static void map_iso_8859_8(void);static void map_iso_8859_9(void);static void map_iso_8859_10(void);static void map_iso_8859_13(void);static void map_iso_8859_14(void);static void map_iso_8859_15(void);static void map_unicode(void);#ifndef	CP866static void map_windows_1251_to_koi8r(void);#elsestatic void map_windows_1251_to_cp866(void);static void map_koi8_r_to_cp866(void);static void map_iso_8859_5_to_cp866(void);#endifstatic void map_windows_1252(void);static void map_windows_1256(void);static void map_nonascii_characters(void);#define	DEBUG#undef	DEBUG#ifndef	DEBUG#define	PRINT_CHARSET_TABLE#else#define	PRINT_CHARSET_TABLE	print_charset_table()#undef	DEBUG_GENERAL#define	DEBUG_GENERAL(level)	(verbose >= level)#endif#ifdef	DEBUGstatic void print_charset_table(void){    int c,r,i;    char ch;    if (!DEBUG_GENERAL(1))	return;    printf( "\n" );    for (r=0; r<4; r+=1) {	for (c=0; c<64; c+=1) {	    i=r*64+c;	    ch=charset_table[i];	    if (ch != 0x08 && ch != 0x09 && ch != '\n' && ch != '\r')		printf(" %02X.%2c.%02X", i, ch, ch);	    else		printf(" %02X.%02X.%02X", i, ch, ch);	    if ((c & 15) == 15)		printf( "\n" );	}    }    printf( "\n" );}#endifstatic void map_nonascii_characters(void){    uint ch;    for (ch = 0; ch < COUNTOF(charset_table); ch += 1)    {	/* convert high-bit characters to '?' */	if (ch & 0x80 && casefold_table[ch] == ch)	    casefold_table[ch] = '?';    }}static void map_xlate_characters(unsigned char *xlate, uint size){    uint i;    for (i = 0; i < size; i += 2)    {	byte from = xlate[i];	byte to   = xlate[i+1];	charset_table[from] = to;    }}static void map_default(void){    unsigned int ch;    for (ch = 0; ch < COUNTOF(charset_table); ch += 1)    {	charset_table[ch] = casefold_table[ch] = ch;    }    PRINT_CHARSET_TABLE;    for (ch=0; ch < COUNTOF(charset_table); ch += 1)    {	if (iscntrl(ch) &&		/* convert control characters to blanks */	    ch != '\t' && ch != '\n')	/* except tabs and newlines		*/	    charset_table[ch] = SP;    }    PRINT_CHARSET_TABLE;}static void map_iso_8859_1(void)	/* ISOIEC 8859-1:1998 Latin Alphabet No. 1 */{    /* Not yet implemented */}static void map_iso_8859_2(void)	/* ISOIEC 8859-2:1999 Latin Alphabet No. 2 */{    /* Not yet implemented */}static void map_iso_8859_3(void)	/* ISOIEC 8859-3:1999 Latin Alphabet No. 3 */{    /* Not yet implemented */}static void map_iso_8859_4(void)	/* ISOIEC 8859-4:1998 Latin Alphabet No. 4 */{    /* Not yet implemented */}#ifndef	CP866static void map_iso_8859_5(void)	/* ISOIEC 8859-5:1999 LatinCyrillic Alphabet */{    /* Not yet implemented */}#endifstatic void map_iso_8859_6(void)	/* ISOIEC 8859-6:1999 LatinArabic Alphabet */{    /* Not yet implemented */}static void map_iso_8859_7(void)	/* ISO 8859-7:1987 LatinGreek Alphabet     */{    /* Not yet implemented */}static void map_iso_8859_8(void)	/* ISOIEC 8859-8:1999 LatinHebrew Alphabet */{    /* Not yet implemented */}static void map_iso_8859_9(void)	/* ISOIEC 8859-9:1999 Latin Alphabet No. 5 */{    /* Not yet implemented */}static void map_iso_8859_10(void)	/* ISOIEC 8859-10:1998 Latin Alphabet No. 6 */{    /* Not yet implemented */}static void map_iso_8859_13(void)	/* ISOIEC 8859-13:1998 Latin Alphabet No. 7 (Baltic Rim) */{    /* Not yet implemented */}static void map_iso_8859_14(void)	/* ISOIEC 8859-14:1998 Latin Alphabet No. 8 (Celtic) */{    /* Not yet implemented */}static void map_iso_8859_15(void)	/* ISOIEC 8859-15:1999 Latin Alphabet No. 9 */{    static unsigned char xlate_15[] = {	0xA0, ' ',		/* A0  160      160 NO-BREAK SPACE */	0xA1, '!',		/* A1  161  

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -