📄 nkf.c
字号:
oconv = e_oconv; continue; case 's': /* SJIS output */ oconv = s_oconv; continue; case 'l': /* ISO8859 Latin-1 support, no conversion */ iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */ input_f = LATIN1_INPUT; continue; case 'i': /* Kanji IN ESC-$-@/B */ if(*cp=='@'||*cp=='B') kanji_intro = *cp++; continue; case 'o': /* ASCII IN ESC-(-J/B */ if(*cp=='J'||*cp=='B'||*cp=='H') ascii_intro = *cp++; continue; case 'r': rot_f = TRUE; continue;#if defined(MSDOS) || defined(__OS2__) case 'T': binmode_f = FALSE; continue;#endif#ifndef PERL_XS case 'v': usage(); exit(1); break;#endif /* Input code assumption */ case 'J': /* JIS input */ case 'E': /* AT&T EUC input */ input_f = JIS_INPUT; continue; case 'S': /* MS Kanji input */ input_f = SJIS_INPUT; if(x0201_f==NO_X0201) x0201_f=TRUE; continue; case 'Z': /* Convert X0208 alphabet to asii */ /* bit:0 Convert X0208 bit:1 Convert Kankaku to one space bit:2 Convert Kankaku to two spaces */ if('9'>= *cp && *cp>='0') alpha_f |= 1<<(*cp++ -'0'); else alpha_f |= TRUE; continue; case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */ x0201_f = FALSE; /* No X0201->X0208 conversion */ /* accept X0201 ESC-(-I in JIS, EUC, MS Kanji SI/SO in JIS, EUC, MS Kanji SSO in EUC, JIS, not in MS Kanji MS Kanji (0xa0-0xdf) output X0201 ESC-(-I in JIS (0x20-0x5f) SSO in EUC (0xa0-0xdf) 0xa0-0xd in MS Kanji (0xa0-0xdf) */ continue; case 'X': /* Assume X0201 kana */ /* Default value is NO_X0201 for EUC/MS-Kanji mix */ x0201_f = TRUE; continue; case 'f': /* folding -f60 or -f */ fold_f = TRUE; fold_len = atoi(cp); if(!(0<fold_len && fold_len<BUFSIZ)) fold_len = DEFAULT_FOLD; while('0'<= *cp && *cp <='9') cp++; continue; case 'm': /* MIME support */ mime_f = TRUE; if(*cp=='B'||*cp=='Q') { mime_mode = *cp++; mimebuf_f = FIXED_MIME; } else if (*cp=='0') { mime_f = FALSE; } continue; case 'M': /* MIME output */ oconv = j_oconv; /* sorry... not yet done.. */ continue; case 'B': /* Broken JIS support */ /* bit:0 no ESC JIS bit:1 allow any x on ESC-(-x or ESC-$-x bit:2 reset to ascii on NL */ if('9'>= *cp && *cp>='0') broken_f |= 1<<(*cp++ -'0'); else broken_f |= TRUE; continue;#ifndef PERL_XS case 'O':/* for Output file */ file_out = TRUE; continue;#endif case 'c':/* add cr code */ add_cr = TRUE; continue; case 'd':/* delete cr code */ del_cr = TRUE; continue; default: /* bogus option but ignored */ continue; } }}intnoconvert(f) FILE *f;{ int c; while ((c = getc(f)) != EOF) putchar(c); return 1;}intkanji_convert(f) FILE *f;{ int c1, c2; c2 = 0; if(input_f == JIS_INPUT || input_f == LATIN1_INPUT) { estab_f = TRUE; iconv = oconv; } else if(input_f == SJIS_INPUT) { estab_f = TRUE; iconv = s_iconv; } else { estab_f = FALSE; iconv = oconv; } input_mode = ASCII; output_mode = ASCII; shift_mode = FALSE;#define NEXT continue /* no output, get next */#define SEND ; /* output c1 and c2, get next */#define LAST break /* end of loop, go closing */ while ((c1 = GETC(f)) != EOF) { if(c2) { /* second byte */ if(c2 > DEL) { /* in case of 8th bit is on */ if(!estab_f) { /* in case of not established yet */ if(c1 > SSP) { /* It is still ambiguious */ h_conv(f, c2, c1); c2 = 0; NEXT; } else if(c1 < AT) { /* ignore bogus code */ c2 = 0; NEXT; } else { /* established */ /* it seems to be MS Kanji */ estab_f = TRUE; iconv = s_iconv; SEND; } } else /* in case of already established */ if(c1 < AT) { /* ignore bogus code */ c2 = 0; NEXT; } else SEND; } else /* 7 bit code */ /* it might be kanji shitfted */ if((c1 == DEL) || (c1 <= SPACE)) { /* ignore bogus first code */ c2 = 0; NEXT; } else SEND; } else { /* first byte */ if(c1 > DEL) { /* 8 bit code */ if(!estab_f && !iso8859_f) { /* not established yet */ if(c1 < SSP) { /* it seems to be MS Kanji */ estab_f = TRUE; iconv = s_iconv; } else if(c1 < 0xe0) { /* it seems to be EUC */ estab_f = TRUE; iconv = oconv; } else { /* still ambiguious */ } c2 = c1; NEXT; } else { /* estab_f==TRUE */ if(iso8859_f) { SEND; } else if(SSP<=c1 && c1<0xe0 && iconv == s_iconv) { /* SJIS X0201 Case... */ /* This is too arrogant, but ... */ if(x0201_f==NO_X0201) { iconv = oconv; c2 = c1; NEXT; } else if(x0201_f) { if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) { /* look ahead for X0201/X0208conversion */ if((c2 = GETC(f)) == EOF) { (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); LAST; } else if(c2==(0xde)) { /* $BByE@(B */ (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]); c2=0; NEXT; } else if(c2==(0xdf)&&ev[(c1-SSP)*2]) { /* $BH>ByE@(B */ (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]); c2=0; NEXT; } UNGETC(c2,f); c2 = 0; } (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); NEXT; } else SEND; } else if(c1==SSO && iconv != s_iconv) { /* EUC X0201 Case */ /* This is too arrogant if(x0201_f == NO_X0201) { estab_f = FALSE; c2 = 0; NEXT; } */ c1 = GETC(f); /* skip SSO */ euc_1byte_check: if(x0201_f && SSP<=c1 && c1<0xe0) { if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) { if((c2 = GETC(f)) == EOF) { (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); LAST; } /* forward lookup $BByE@(B/$BH>ByE@(B */ if(c2 != SSO) { UNGETC(c2,f); c2 = 0; (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); NEXT; } else if((c2 = GETC(f)) == EOF) { (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); (*oconv)(0,SSO); LAST; } else if(c2==(0xde)) { /* $BByE@(B */ (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]); c2=0; NEXT; } else if(c2==(0xdf)&&ev[(c1-SSP)*2]) { /* $BH>ByE@(B */ (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]); c2=0; NEXT; } else { (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); /* we have to check this c2 */ /* and no way to push back SSO */ c1 = c2; c2 = 0; goto euc_1byte_check; } } (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); NEXT; } else SEND; } else if(c1 < SSP && iconv != s_iconv) { /* strange code in EUC */ iconv = s_iconv; /* try SJIS */ c2 = c1; NEXT; } else { /* already established */ c2 = c1; NEXT; } } } else if((c1 > SPACE) && (c1 != DEL)) { /* in case of Roman characters */ if(shift_mode) { c1 |= 0x80; /* output 1 shifted byte */ if(x0201_f && (!iso8859_f||input_mode==X0201) && SSP<=c1 && c1<0xe0 ) { if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) { if((c2 = GETC(f)) == EOF) { (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); LAST; } else if(c2==(0xde&0x7f)) { /* $BByE@(B */ (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]); c2=0; NEXT; } else if(c2==(0xdf&0x7f)&&ev[(c1-SSP)*2]) { /* $BH>ByE@(B */ (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]); c2=0; NEXT; } UNGETC(c2,f); c2 = 0; } (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]); NEXT; } else SEND; } else if(c1 == '(' && broken_f && input_mode == X0208 && !mime_mode ) { /* Try to recover missing escape */ if((c1 = GETC(f)) == EOF) { (*oconv)(0, '('); LAST; } else { if(c1 == 'B' || c1 == 'J' || c1 == 'H') { input_mode = ASCII; shift_mode = FALSE; NEXT; } else { (*oconv)(0, '('); /* do not modify various input_mode */ /* It can be vt100 sequence */ SEND; } } } else if(input_mode == X0208) { /* in case of Kanji shifted */ c2 = c1; NEXT; /* goto next_byte */ } else if(c1 == '=' && mime_f && !mime_mode ) { if((c1 = getc(f)) == EOF) { (*oconv)(0, '='); LAST; } else if(c1 == '?') { /* =? is mime conversiooon start sequence */ if(mime_begin(f) == EOF) /* check in detail */ LAST; else NEXT; } else { (*oconv)(0, '='); ungetc(c1,f); NEXT; } } else if(c1 == '$' && broken_f && !mime_mode) { /* try to recover missing escape */ if((c1 = GETC(f)) == EOF) { (*oconv)(0, '$'); LAST; } else if(c1 == '@'|| c1 == 'B') { /* in case of Kanji in ESC sequence */ input_mode = X0208; shift_mode = FALSE; NEXT; } else { /* sorry */ (*oconv)(0, '$'); (*oconv)(0, c1); NEXT; } } else SEND; } else if(c1 == SI) { shift_mode = FALSE; NEXT; } else if(c1 == SO) { shift_mode = TRUE; NEXT; } else if(c1 == ESC ) { if((c1 = GETC(f)) == EOF) { (*oconv)(0, ESC); LAST; } else if(c1 == '$') { if((c1 = GETC(f)) == EOF) { (*oconv)(0, ESC); (*oconv)(0, '$'); LAST; } else if(c1 == '@'|| c1 == 'B') { /* This is kanji introduction */ input_mode = X0208; shift_mode = FALSE; NEXT; } else if(c1 == '(') { if((c1 = GETC(f)) == EOF) { (*oconv)(0, ESC); (*oconv)(0, '$'); (*oconv)(0, '('); LAST; } else if(c1 == '@'|| c1 == 'B') { /* This is kanji introduction */ input_mode = X0208; shift_mode = FALSE; NEXT; } else { (*oconv)(0, ESC); (*oconv)(0, '$'); (*oconv)(0, '('); (*oconv)(0, c1); NEXT; } } else if(broken_f&0x2) { input_mode = X0208; shift_mode = FALSE; NEXT; } else { (*oconv)(0, ESC); (*oconv)(0, '$'); (*oconv)(0, c1); NEXT; } } else if(c1 == '(') { if((c1 = GETC(f)) == EOF) { (*oconv)(0, ESC); (*oconv)(0, '('); LAST; } else { if(c1 == 'I') { /* This is X0201 kana introduction */ input_mode = X0201; shift_mode = X0201; NEXT; } else if(c1 == 'B' || c1 == 'J' || c1 == 'H') { /* This is X0208 kanji introduction */ input_mode = ASCII; shift_mode = FALSE; NEXT; } else if(broken_f&0x2) { input_mode = ASCII; shift_mode = FALSE; NEXT; } else { (*oconv)(0, ESC); (*oconv)(0, '('); /* maintain various input_mode here */ SEND; } } } else { /* lonely ESC */ (*oconv)(0, ESC); SEND; } } else if(c1 == NL && broken_f&4) { input_mode = ASCII; SEND; } else SEND; } /* send: */ if(input_mode == X0208) (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */ else (*iconv)(c2, c1); /* can be EUC/SJIS */ c2 = 0; continue; /* goto next_word */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -