📄 kanjiinfo.h
字号:
#ifndef _KANJIINFO_H_#define _KANJIINFO_H_
#include <iostream>
/** * @class KanjiInfo * */class KanjiInfo { public: typedef unsigned short KANJI;
enum ENCODING { JIS, SJIS, EUC };
static unsigned short countKanjiUnicode();
static unsigned short countMiscUnicode();
static unsigned short countExtUnicode();
public:
unsigned short jis2sjis (unsigned short);
unsigned short jis2unicode (unsigned short);
unsigned short sjis2jis (unsigned short);
unsigned short unicode2jis (unsigned short);
private:
void sjis2jis (unsigned short *,unsigned short *);
// Code pages for various locales static KANJI misc_unicode[]; static KANJI ext_unicode[]; static KANJI kanji_unicode[]; static KANJI cp1250[]; static KANJI cp1251[]; static KANJI cp1252[]; static KANJI cp1253[]; static KANJI cp1254[]; static KANJI cp1255[]; static KANJI cp1256[]; static KANJI cp1257[]; static KANJI cp1258[]; };/** * @class jis * * STL iostream manipulator to convert character given in JIS * encoding to EUC or SJIS. * * Greatly simplifies the conversion process and makes it a more * locigal implementation. * * Example usage: * cout << jis_code << ", EUC: " << jis<EUC>(jis_code) << endl; * cout << jis_code << ", SJIS: " << jis<SJIS>(jis_code) << endl; */template <KanjiInfo::ENCODING enc = EUC>class jis : KanjiInfo { public: explicit jis(unsigned short jis_code) { switch(enc) { case KanjiInfo::EUC: _code = convert_unicode(jis_code); break; case KanjiInfo::SJIS: _code = convert_sjis(jis_code); break; default: _code = 0; } } private: unsigned short _code; /** * Convert JIS to Unicode * * Based on iformation given in [1, p.262] * * 0x0021->0x007E : ASCII * 0x00A1->0x00DF : 1/2 width Katakana * * 0x0081->0x009F : * 0x00E0->0x00EF : @see JIS x 0208-1990 * 0x0040->0x007E * 0x0080->0x00FC : @see JIS x 0208-1990 */ unsigned short convert_unicode (unsigned short ch) { if(ch <= 0x7e) return (ch); // ASCII if((ch >= 0x0080) && (ch <= 0x00ff)) return (ext_unicode[ch-0x80]); // Extended ASCII if((ch >= 0x2330) && (ch <= 0x237a)) return (ch-0x2330+0xff10); // Japanese ASCII if((ch >= 0x2421) && (ch <= 0x2473)) return (ch-0x2421+0x3041); // Hiragana if((ch >= 0x2521) && (ch <= 0x2576)) return (ch-0x2521+0x30a1); // Katakana if((ch >= 0x2621) && (ch <= 0x2658)) return (ch-0x2621+0x0391); // Greek if((ch >= 0x2721) && (ch <= 0x2771)) return (ch-0x2721+0x0410); // Cyrillic if((ch >= 0x3021) && (ch <= 0x7426)) { // Kanji ch -= 0x3021; ch = ((ch&0xff00)>>0x08) * (94+(ch&0x00ff)); return (kanji_unicode[ch]); } if((ch >= 0x2121) && (ch <= 0x217e)) return (misc_unicode[ch-0x2121]); // Main symbol group if ((ch >= 0x2221) && (ch <= 0x227f)) return (misc_unicode[ch-0x2221+94]); // Secondary symbol group return (0); } /** * Convert JIS to SJIS * */ unsigned short convert_sjis(unsigned short ch) { unsigned short hi, lo; hi = (ch >> 8); lo = (ch & 0xff); hi = ((hi - ((hi < 160) ? 112 : 176)) << 1); if(lo < 159) hi--; lo -= ( (lo < 159) ? ((lo > 127) ? 32 : 31) : 126); return ((hi << 8) | lo); } /** * Perform the output manipultion */ template <class charT, class Traits> inline friend std::basic_ostream<charT,Traits>& operator<<(std::basic_ostream<charT,Traits>& os, const jis& j) { if(_code) os.put(_code); }};
/**
* @class euc
*
* STL iostream manipulator to convert character given in EUC
* encoding to JIS or SJIS.
*
* Greatly simplifies the conversion process and makes it a more
* locigal implementation.
*
*/
template <KanjiInfo::ENCODING enc = EUC>
class euc : KanjiInfo {
public:
explicit euc(unsigned short euc_code) {
switch(enc) {
case KanjiInfo::JIS:
_code = convert_jis(euc_code);
break;
case KanjiInfo::SJIS:
_code = convert_sjis(euc_code);
break;
default:
_code = 0;
}
}
private:
unsigned short _code;
unsigned short convert_jis(unsigned short ch) {
unsigned short i;
if((ch <= 0x007e))
return (ch); // ASCII
if((ch >= 0x3041) && (ch <= 0x3093))
return (ch-0x3041+0x2421); // Hiragana
if((ch >= 0x30a1) && (ch <= 0x30f6))
return (ch-0x30a1+0x2521); // Katakana
if((ch >= 0x0391) && (ch <= 0x03c9))
return (ch-0x0391+0x2621); // Greek
if((ch >= 0x0410) && (ch <= 0x044f))
return (ch-0x0410+0x2721); // Cyrillic
// The kanji
for(i=0; i < countKanjiUnicode(); i++) {
if (kanji_unicode[i] == ch) {
ch = i/94;
return (((ch+0x30) << 8) | (i-ch*94)+0x21);
}
}
// Symbol and pucntuation (main group)
for (i=0; i< countMiscUnicode(); i++) {
if (misc_unicode[i] == ch) return ((i >= 94) ? 0x2221+i-94 : 0x2121+i);
}
// Extneded ascii codes.
for (i=0; i<NUMBER_EXTUNICODE; i++) {
if (ext_unicode[i] == ch)
return (i+0x80);
}
if((ch >= 0xff10) && (ch <= 0xff5a))
return (ch-0xff10+0x2330); // Japanese ASCII.
// The Japanese ASCII needs to be here because some puncutation is stored within gaps
return 0;
}
unsigned short convert_sjis(unsigned short ch) {
}
/**
* Perform the output manipultion
*/
template <class charT, class Traits>
inline friend std::basic_ostream<charT,Traits>& operator<<(std::basic_ostream<charT,Traits>& os, const jis& j) {
if(_code)
os.put(_code);
}
};
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -