📄 nkf.c

📁 NKF是网络上写新闻或邮件的时候,汉字的编码转换程序
💻 C
📖 第 1 页 / 共 4 页
字号:
	    oconv = e_oconv;	    continue;	case 's':           /* SJIS output */	    oconv = s_oconv;	    continue;	case 'l':           /* ISO8859 Latin-1 support, no conversion */	    iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */	    input_f = LATIN1_INPUT;	    continue;	case 'i':           /* Kanji IN ESC-$-@/B */	    if(*cp=='@'||*cp=='B') 		kanji_intro = *cp++;	    continue;	case 'o':           /* ASCII IN ESC-(-J/B */	    if(*cp=='J'||*cp=='B'||*cp=='H') 		ascii_intro = *cp++;	    continue;	case 'r':	    rot_f = TRUE;	    continue;#if defined(MSDOS) || defined(__OS2__) 	case 'T':	    binmode_f = FALSE;	    continue;#endif#ifndef PERL_XS	case 'v':	    usage();	    exit(1);	    break;#endif	/* Input code assumption */	case 'J':   /* JIS input */	case 'E':   /* AT&T EUC input */	    input_f = JIS_INPUT;	    continue;	case 'S':   /* MS Kanji input */	    input_f = SJIS_INPUT;	    if(x0201_f==NO_X0201) x0201_f=TRUE;	    continue;	case 'Z':   /* Convert X0208 alphabet to asii */	    /*  bit:0   Convert X0208		bit:1   Convert Kankaku to one space		bit:2   Convert Kankaku to two spaces	    */	    if('9'>= *cp && *cp>='0') 		alpha_f |= 1<<(*cp++ -'0');	    else 		alpha_f |= TRUE;	    continue;	case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */	    x0201_f = FALSE;    /* No X0201->X0208 conversion */	    /* accept  X0201		    ESC-(-I     in JIS, EUC, MS Kanji		    SI/SO       in JIS, EUC, MS Kanji		    SSO         in EUC, JIS, not in MS Kanji		    MS Kanji (0xa0-0xdf) 	       output  X0201		    ESC-(-I     in JIS (0x20-0x5f)		    SSO         in EUC (0xa0-0xdf)		    0xa0-0xd    in MS Kanji (0xa0-0xdf) 	    */	    continue;	case 'X':   /* Assume X0201 kana */	    /* Default value is NO_X0201 for EUC/MS-Kanji mix */	    x0201_f = TRUE;	    continue;	case 'f':   /* folding -f60 or -f */	    fold_f = TRUE;	    fold_len = atoi(cp);	    if(!(0<fold_len && fold_len<BUFSIZ)) 		fold_len = DEFAULT_FOLD;	    while('0'<= *cp && *cp <='9') cp++;	    continue;	case 'm':   /* MIME support */	    mime_f = TRUE;	    if(*cp=='B'||*cp=='Q') {		mime_mode = *cp++;		mimebuf_f = FIXED_MIME;	    } else if (*cp=='0') {		mime_f = FALSE;	    }	    continue;	case 'M':   /* MIME output */	    oconv = j_oconv;    /* sorry... not yet done.. */	    continue;	case 'B':   /* Broken JIS support */	    /*  bit:0   no ESC JIS		bit:1   allow any x on ESC-(-x or ESC-$-x		bit:2   reset to ascii on NL	    */	    if('9'>= *cp && *cp>='0') 		broken_f |= 1<<(*cp++ -'0');	    else 		broken_f |= TRUE;	    continue;#ifndef PERL_XS	case 'O':/* for Output file */	    file_out = TRUE;	    continue;#endif	case 'c':/* add cr code */	    add_cr = TRUE;	    continue;	case 'd':/* delete cr code */	    del_cr = TRUE;	    continue;	default:	    /* bogus option but ignored */	    continue;	}    }}intnoconvert(f)    FILE  *f;{    int    c;    while ((c = getc(f)) != EOF)      putchar(c);    return 1;}intkanji_convert(f)    FILE  *f;{    int    c1,                    c2;    c2 = 0;    if(input_f == JIS_INPUT || input_f == LATIN1_INPUT) {        estab_f = TRUE; iconv = oconv;    } else if(input_f == SJIS_INPUT) {        estab_f = TRUE;  iconv = s_iconv;    } else {        estab_f = FALSE; iconv = oconv;    }    input_mode = ASCII;    output_mode = ASCII;    shift_mode = FALSE;#define NEXT continue      /* no output, get next */#define SEND ;             /* output c1 and c2, get next */#define LAST break         /* end of loop, go closing  */    while ((c1 = GETC(f)) != EOF) {        if(c2) {            /* second byte */            if(c2 > DEL) {                /* in case of 8th bit is on */                if(!estab_f) {                    /* in case of not established yet */                    if(c1 > SSP) {                        /* It is still ambiguious */                        h_conv(f, c2, c1);                        c2 = 0;                        NEXT;                    } else if(c1 < AT) {                        /* ignore bogus code */                        c2 = 0;                        NEXT;                    } else {                        /* established */                        /* it seems to be MS Kanji */                        estab_f = TRUE;                        iconv = s_iconv;                        SEND;                    }                } else                    /* in case of already established */                    if(c1 < AT) {                        /* ignore bogus code */                        c2 = 0;                        NEXT;                    } else                        SEND;            } else                /* 7 bit code */                /* it might be kanji shitfted */                if((c1 == DEL) || (c1 <= SPACE)) {                    /* ignore bogus first code */                    c2 = 0;                    NEXT;                } else                    SEND;        } else {            /* first byte */            if(c1 > DEL) {                /* 8 bit code */                if(!estab_f && !iso8859_f) {                    /* not established yet */                    if(c1 < SSP) {                        /* it seems to be MS Kanji */                        estab_f = TRUE;                        iconv = s_iconv;                    } else if(c1 < 0xe0) {                        /* it seems to be EUC */                        estab_f = TRUE;                        iconv = oconv;                    } else {                        /* still ambiguious */                    }                    c2 = c1;                    NEXT;                } else { /* estab_f==TRUE */                    if(iso8859_f) {                        SEND;                    } else if(SSP<=c1 && c1<0xe0 && iconv == s_iconv) {                        /* SJIS X0201 Case... */                        /* This is too arrogant, but ... */                        if(x0201_f==NO_X0201) {                            iconv = oconv;                            c2 = c1;                            NEXT;                        } else                         if(x0201_f) {                            if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {                            /* look ahead for X0201/X0208conversion */                                if((c2 = GETC(f)) == EOF) {                                    (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);                                    LAST;                                } else if(c2==(0xde)) { /* $BByE@(B */                                    (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);                                    c2=0;                                     NEXT;                                } else if(c2==(0xdf)&&ev[(c1-SSP)*2]) {                                     /* $BH>ByE@(B */                                    (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);                                    c2=0;                                     NEXT;                                }                                 UNGETC(c2,f); c2 = 0;                            }                            (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);                            NEXT;                        } else                            SEND;                    } else if(c1==SSO && iconv != s_iconv) {                        /* EUC X0201 Case */                        /* This is too arrogant                        if(x0201_f == NO_X0201) {                            estab_f = FALSE;                             c2 = 0;                              NEXT;                        } */                        c1 = GETC(f);  /* skip SSO */                        euc_1byte_check:                        if(x0201_f && SSP<=c1 && c1<0xe0) {                            if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {                                if((c2 = GETC(f)) == EOF) {                                    (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);                                    LAST;                                }                                /* forward lookup $BByE@(B/$BH>ByE@(B */                                if(c2 != SSO) {                                    UNGETC(c2,f); c2 = 0;                                     (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);                                    NEXT;                                } else if((c2 = GETC(f)) == EOF) {                                    (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);                                    (*oconv)(0,SSO);                                     LAST;                                } else if(c2==(0xde)) { /* $BByE@(B */                                    (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);                                    c2=0;                                     NEXT;                                } else if(c2==(0xdf)&&ev[(c1-SSP)*2]) {                                     /* $BH>ByE@(B */                                    (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);                                    c2=0;                                     NEXT;                                } else {                                    (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);                                    /* we have to check this c2 */                                    /* and no way to push back SSO */                                    c1 = c2; c2 = 0;                                    goto euc_1byte_check;                                }                            }                            (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);                            NEXT;                        } else                             SEND;                    } else if(c1 < SSP && iconv != s_iconv) {                        /* strange code in EUC */                        iconv = s_iconv;  /* try SJIS */                        c2 = c1;                        NEXT;                    } else {                       /* already established */                       c2 = c1;                       NEXT;                    }                }            } else if((c1 > SPACE) && (c1 != DEL)) {                /* in case of Roman characters */                if(shift_mode) {                     c1 |= 0x80;                    /* output 1 shifted byte */                    if(x0201_f && (!iso8859_f||input_mode==X0201) &&                             SSP<=c1 && c1<0xe0 ) {                        if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {                            if((c2 = GETC(f)) == EOF) {                                (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);                                LAST;                            } else if(c2==(0xde&0x7f)) { /* $BByE@(B */                                (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);                                c2=0;                                 NEXT;                            } else if(c2==(0xdf&0x7f)&&ev[(c1-SSP)*2]) {                                  /* $BH>ByE@(B */                                (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);                                c2=0;                                 NEXT;                            }                            UNGETC(c2,f); c2 = 0;                        }                        (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);                        NEXT;                    } else                        SEND;                } else if(c1 == '(' && broken_f && input_mode == X0208                        && !mime_mode ) {                    /* Try to recover missing escape */                    if((c1 = GETC(f)) == EOF) {                        (*oconv)(0, '(');                        LAST;                    } else {                        if(c1 == 'B' || c1 == 'J' || c1 == 'H') {                            input_mode = ASCII; shift_mode = FALSE;                            NEXT;                        } else {                            (*oconv)(0, '(');                            /* do not modify various input_mode */                            /* It can be vt100 sequence */                            SEND;                        }                    }                } else if(input_mode == X0208) {                    /* in case of Kanji shifted */                    c2 = c1;                    NEXT;                    /* goto next_byte */                } else if(c1 == '=' && mime_f && !mime_mode ) {                    if((c1 = getc(f)) == EOF) {                        (*oconv)(0, '=');                        LAST;                    } else if(c1 == '?') {                        /* =? is mime conversiooon start sequence */                        if(mime_begin(f) == EOF) /* check in detail */                            LAST;                        else                            NEXT;                    } else {                        (*oconv)(0, '=');                        ungetc(c1,f);                        NEXT;                    }                } else if(c1 == '$' && broken_f && !mime_mode) {                    /* try to recover missing escape */                    if((c1 = GETC(f)) == EOF) {                        (*oconv)(0, '$');                        LAST;                    } else if(c1 == '@'|| c1 == 'B') {                        /* in case of Kanji in ESC sequence */                        input_mode = X0208;                        shift_mode = FALSE;                        NEXT;                    } else {                        /* sorry */                        (*oconv)(0, '$');                        (*oconv)(0, c1);                        NEXT;                    }                } else                    SEND;            } else if(c1 == SI) {                shift_mode = FALSE;                 NEXT;            } else if(c1 == SO) {                shift_mode = TRUE;                 NEXT;            } else if(c1 == ESC ) {                if((c1 = GETC(f)) == EOF) {                    (*oconv)(0, ESC);                    LAST;                } else if(c1 == '$') {                    if((c1 = GETC(f)) == EOF) {                        (*oconv)(0, ESC);                        (*oconv)(0, '$');                        LAST;                    } else if(c1 == '@'|| c1 == 'B') {                        /* This is kanji introduction */                        input_mode = X0208;                        shift_mode = FALSE;                        NEXT;                    } else if(c1 == '(') {			if((c1 = GETC(f)) == EOF) {			    (*oconv)(0, ESC);			    (*oconv)(0, '$');			    (*oconv)(0, '(');			    LAST;			} else if(c1 == '@'|| c1 == 'B') {			    /* This is kanji introduction */			    input_mode = X0208;			    shift_mode = FALSE;			    NEXT;			} else {			    (*oconv)(0, ESC);			    (*oconv)(0, '$');			    (*oconv)(0, '(');			    (*oconv)(0, c1);			    NEXT;			}                    } else if(broken_f&0x2) {                        input_mode = X0208;                        shift_mode = FALSE;                        NEXT;                    } else {                        (*oconv)(0, ESC);                        (*oconv)(0, '$');                        (*oconv)(0, c1);                        NEXT;                    }                } else if(c1 == '(') {                    if((c1 = GETC(f)) == EOF) {                        (*oconv)(0, ESC);                        (*oconv)(0, '(');                        LAST;                    } else {                        if(c1 == 'I') {                            /* This is X0201 kana introduction */                            input_mode = X0201; shift_mode = X0201;                            NEXT;                        } else if(c1 == 'B' || c1 == 'J' || c1 == 'H') {                            /* This is X0208 kanji introduction */                            input_mode = ASCII; shift_mode = FALSE;                            NEXT;                        } else if(broken_f&0x2) {                            input_mode = ASCII; shift_mode = FALSE;                            NEXT;                        } else {                            (*oconv)(0, ESC);                            (*oconv)(0, '(');                            /* maintain various input_mode here */                            SEND;                        }                    }                } else {                    /* lonely ESC  */                    (*oconv)(0, ESC);                    SEND;                }            } else if(c1 == NL && broken_f&4) {                input_mode = ASCII;                 SEND;            } else                SEND;        }        /* send: */        if(input_mode == X0208)             (*oconv)(c2, c1);  /* this is JIS, not SJIS/EUC case */        else            (*iconv)(c2, c1);  /* can be EUC/SJIS */        c2 = 0;        continue;        /* goto next_word */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -