📄 scim_pinyin.cpp
字号:
/** @file scim_pinyin.cpp
* implementation of PinyinKey, PinyinTable and related classes.
*/
/*
* Smart Chinese Input Method
*
* Copyright (c) 2002 James Su <suzhe@turbolinux.com.cn>
*
* $Id: scim_pinyin.cpp,v 1.2 2004/07/17 07:05:31 Lu Mu Exp $
*
*/
#define Uses_STL_AUTOPTR
#define Uses_STL_FUNCTIONAL
#define Uses_STL_VECTOR
#define Uses_STL_IOSTREAM
#define Uses_STL_FSTREAM
#define Uses_STL_ALGORITHM
#define Uses_STL_MAP
#define Uses_STL_UTILITY
#define Uses_STL_IOMANIP
#define Uses_C_STDIO
#define Uses_SCIM_UTILITY
#define Uses_SCIM_SERVER
#define Uses_SCIM_ICONV
#define Uses_SCIM_CONFIG_BASE
#define Uses_SCIM_CONFIG_PATH
#define Uses_SCIM_LOOKUP_TABLE
//#include <scim.h>
#include <stdio.h>
#include "scim_pinyin.h"
/*
* Sample implementation from Unicode home page.
* http://www.stonehand.com/unicode/standard/fss-utf.html
*/
struct utf8_table {
int cmask;
int cval;
int shift;
long lmask;
long lval;
};
static struct utf8_table utf8_table[] =
{
{0x80, 0x00, 0*6, 0x7F, 0, /* 1 byte sequence */},
{0xE0, 0xC0, 1*6, 0x7FF, 0x80, /* 2 byte sequence */},
{0xF0, 0xE0, 2*6, 0xFFFF, 0x800, /* 3 byte sequence */},
{0xF8, 0xF0, 3*6, 0x1FFFFF, 0x10000, /* 4 byte sequence */},
{0xFC, 0xF8, 4*6, 0x3FFFFFF, 0x200000, /* 5 byte sequence */},
{0xFE, 0xFC, 5*6, 0x7FFFFFFF, 0x4000000, /* 6 byte sequence */},
{0, /* end of table */}
};
int
utf8_mbtowc(ucs4_t *p, const __u8 *s, int n)
{
long l;
int c0, c, nc;
struct utf8_table *t;
nc = 0;
c0 = *s;
l = c0;
for (t = utf8_table; t->cmask; t++) {
nc++;
if ((c0 & t->cmask) == t->cval) {
l &= t->lmask;
if (l < t->lval)
return -1;
*p = l;
return nc;
}
if (n <= nc)
return -1;
s++;
c = (*s ^ 0x80) & 0xFF;
if (c & 0xC0)
return -1;
l = (l << 6) | c;
}
return -1;
}
int
utf8_wctomb(__u8 *s, ucs4_t wc, int maxlen)
{
long l;
int c, nc;
struct utf8_table *t;
if (s == 0)
return 0;
l = wc;
nc = 0;
for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
nc++;
if (l <= t->lmask) {
c = t->shift;
*s = t->cval | (l >> c);
while (c > 0) {
c -= 6;
s++;
*s = 0x80 | ((l >> c) & 0x3F);
}
return nc;
}
}
return -1;
}
std::ostream &
utf8_write_wchar (std::ostream &os, ucs4_t wc)
{
unsigned char utf8[6];
int count = 0;
if ((count=utf8_wctomb (utf8, wc, 6)) > 0)
os.write ((char*)utf8, count * sizeof (unsigned char));
return os;
}
/*
// Internal functions
static int
__scim_pinyin_compare_initial (const PinyinCustomSettings &custom,
PinyinInitial lhs,
PinyinInitial rhs);
static int
__scim_pinyin_compare_final (const PinyinCustomSettings &custom,
PinyinFinal lhs,
PinyinFinal rhs);
static int
__scim_pinyin_compare_tone (const PinyinCustomSettings &custom,
PinyinTone lhs,
PinyinTone rhs);
*/
// Data definition
static const char scim_pinyin_table_text_header [] = "SCIM_Pinyin_Table_TEXT";
static const char scim_pinyin_table_binary_header [] = "SCIM_Pinyin_Table_BINARY";
static const char scim_pinyin_table_version [] = "VERSION_0_4";
/*
const PinyinCustomSettings scim_default_custom_settings =
{
true, false, true,
{false, false, false, false, false, false, false, false, false, false}
};
*/
const PinyinValidator scim_default_pinyin_validator;
const PinyinToken scim_pinyin_initials[] =
{
{"", {0}, 0, 0},
{"b", {0x3105,0}, 1, 1},
{"c", {0x3118,0}, 1, 1},
{"ch",{0x3114,0}, 2, 1},
{"d", {0x3109,0}, 1, 1},
{"f", {0x3108,0}, 1, 1},
{"g", {0x310d,0}, 1, 1},
{"h", {0x310f,0}, 1, 1},
{"j", {0x3110,0}, 1, 1},
{"k", {0x310e,0}, 1, 1},
{"l", {0x310c,0}, 1, 1},
{"m", {0x3107,0}, 1, 1},
{"n", {0x310b,0}, 1, 1},
{"p", {0x3106,0}, 1, 1},
{"q", {0x3111,0}, 1, 1},
{"r", {0x3116,0}, 1, 1},
{"s", {0x3119,0}, 1, 1},
{"sh",{0x3115,0}, 2, 1},
{"t", {0x310a,0}, 1, 1},
{"w", {0x3128,0}, 1, 1},
{"x", {0x3112,0}, 1, 1},
{"y", {0x3129,0}, 1, 1},
{"z", {0x3117,0}, 1, 1},
{"zh",{0x3113,0}, 2, 1}
};
const PinyinToken scim_pinyin_finals[] =
{
{"", {0}, 0, 0},
{"a", {0x311a,0}, 1, 1},
{"ai", {0x311e,0}, 2, 1},
{"an", {0x3122,0}, 2, 1},
{"ang", {0x3124,0}, 3, 1},
{"ao", {0x3120,0}, 2, 1},
{"e", {0x311c,0}, 1, 1},
{"ei", {0x311f,0}, 2, 1},
{"en", {0x3123,0}, 2, 1},
{"eng", {0x3125,0}, 3, 1},
{"er", {0x3126,0}, 2, 1},
{"i", {0x3127,0}, 1, 1},
{"ia", {0x3127,0x311a,0}, 2, 2},
{"ian", {0x3127,0x3122,0}, 3, 2},
{"iang",{0x3127,0x3124,0}, 4, 2},
{"iao", {0x3127,0x3120,0}, 3, 2},
{"ie", {0x3127,0x311c,0}, 2, 2},
{"in", {0x3127,0x3123,0}, 2, 2},
{"ing", {0x3127,0x3125,0}, 3, 2},
{"iong",{0x3129,0x3125,0}, 4, 2},
{"iou", {0x3127,0x3121,0}, 3, 2},
{"iu", {0x3127,0x3121,0}, 2, 2},
{"ng", {0x312b,0}, 2, 1},
{"o", {0x311b,0}, 1, 1},
{"ong", {0x3128,0x3123,0}, 3, 2},
{"ou", {0x3121,0}, 2, 1},
{"u", {0x3128,0}, 1, 1},
{"ua", {0x3128,0x311a,0}, 2, 2},
{"uai", {0x3128,0x311e,0}, 3, 2},
{"uan", {0x3128,0x3122,0}, 3, 2},
{"uang",{0x3128,0x3124,0}, 4, 2},
{"ue", {0x3129,0x311c,0}, 2, 2},
{"uei", {0x3128,0x311f,0}, 3, 2},
{"uen", {0x3128,0x3123,0}, 3, 2},
{"ueng",{0x3128,0x3125,0}, 4, 2},
{"ui", {0x3128,0x311f,0}, 2, 2},
{"un", {0x3128,0x3123,0}, 2, 2},
{"uo", {0x3128,0x311b,0}, 2, 2},
{"v", {0x3129,0}, 1, 1},
{"van", {0x3129,0x3122,0}, 3, 2},
{"ve", {0x3129,0x311c,0}, 2, 2},
{"vn", {0x3129,0x3123,0}, 2, 2}
};
const int scim_number_of_initials = sizeof (scim_pinyin_initials) / sizeof (PinyinToken);
const int scim_number_of_finals = sizeof (scim_pinyin_finals) / sizeof (PinyinToken);
//////////////////////////////////////////////////////////////////////////////
// implementation of PinyinKey
std::ostream&
PinyinKey::output_text (std::ostream &os) const
{
return os << get_key_string ();
}
std::istream&
PinyinKey::input_text (const PinyinValidator &validator, std::istream &is)
{
String key;
is >> key;
set_key (validator, key.c_str());
return is;
}
/*
std::ostream&
PinyinKey::output_binary (std::ostream &os) const
{
unsigned char key [2];
combine_to_bytes (key);
os.write ((const char*) key, sizeof (char) * 2);
return os;
}
std::istream&
PinyinKey::input_binary (const PinyinValidator &validator, std::istream &is)
{
unsigned char key [2];
is.read ((char*) key, sizeof (char) * 2);
extract_from_bytes (key [0], key [1]);
if (!validator (*this)) {
m_tone = SCIM_PINYIN_ZeroTone;
if (!validator (*this)) {
m_final = SCIM_PINYIN_ZeroFinal;
if (!validator (*this))
m_initial = SCIM_PINYIN_ZeroInitial;
}
}
return is;
}
*/
int
PinyinKey::parse_initial (PinyinInitial &initial,
const char *key,
int keylen)
{
int lastlen = 0;
for (int i=0; i<scim_number_of_initials; i++) {
if (keylen >= scim_pinyin_initials [i].len
&& scim_pinyin_initials [i].len >= lastlen
&& strncmp (scim_pinyin_initials [i].str, key,
scim_pinyin_initials [i].len) == 0) {
initial = static_cast<PinyinInitial>(i);
lastlen = scim_pinyin_initials [i].len;
}
}
return lastlen;
}
int
PinyinKey::parse_final (PinyinFinal &final,
const char *key,
int keylen)
{
int lastlen = 0;
for (int i=0; i<scim_number_of_finals; i++) {
if (keylen >= scim_pinyin_finals[i].len
&& scim_pinyin_finals[i].len >= lastlen
&& strncmp (scim_pinyin_finals [i].str, key, scim_pinyin_finals[i].len) == 0) {
final = static_cast<PinyinFinal>(i);
lastlen = scim_pinyin_finals[i].len;
}
}
return lastlen;
}
int
PinyinKey::parse_tone (PinyinTone &tone,
const char *key)
{
int kt = (*key) - '0';
if (kt >= SCIM_PINYIN_First && kt <= SCIM_PINYIN_LastTone) {
tone = static_cast<PinyinTone>(kt);
return 1;
}
return 0;
}
int
PinyinKey::parse_key (PinyinInitial &initial,
PinyinFinal &final,
PinyinTone &tone,
const char *key,
int keylen)
{
if (keylen <= 0) return 0;
initial = SCIM_PINYIN_ZeroInitial;
final = SCIM_PINYIN_ZeroFinal;
tone = SCIM_PINYIN_ZeroTone;
int initial_len = 0, final_len = 0, tone_len = 0;
final_len = parse_final (final, key, keylen);
key += final_len;
keylen -= final_len;
// An initial is present
if (final == SCIM_PINYIN_ZeroFinal) {
initial_len = parse_initial (initial, key, keylen);
key += initial_len;
keylen -= initial_len;
if (keylen){
final_len = parse_final (final, key, keylen);
key += final_len;
keylen -= final_len;
}
}
if (keylen)
tone_len = parse_tone (tone, key);
apply_additional_rules(initial, final);
return initial_len + final_len + tone_len;
}
int
PinyinKey::set_key (const PinyinValidator &validator,
const char *key,
int keylen)
{
if (key == NULL || key[0] == 0) {
return 0;
}
m_initial = SCIM_PINYIN_ZeroInitial;
m_final = SCIM_PINYIN_ZeroFinal;
m_tone = SCIM_PINYIN_ZeroTone;
PinyinInitial initial = SCIM_PINYIN_ZeroInitial;
PinyinFinal final = SCIM_PINYIN_ZeroFinal;
PinyinTone tone = SCIM_PINYIN_ZeroTone;
if (keylen < 0) keylen = strlen (key);
keylen = parse_key (initial, final, tone, key, keylen);
while (keylen > 0 && !validator (PinyinKey (initial, final, tone)))
keylen = parse_key (initial, final, tone, key, keylen-1);
if (keylen) {
m_initial = initial;
m_final = final;
m_tone = tone;
}
return keylen;
}
String
PinyinKey::get_key_string () const
{
char key [16];
if (m_tone)
snprintf (key, 15, "%s%s%d", get_initial_string(), get_final_string(), m_tone);
else
snprintf (key, 15, "%s%s", get_initial_string(), get_final_string());
return String (key);
}
/*
WideString
PinyinKey::get_key_wide_string () const
{
return WideString (get_initial_wide_string ()) + WideString (get_final_wide_string());
}
*/
void
PinyinKey::apply_additional_rules (PinyinInitial &initial, PinyinFinal &final)
{
static struct ReplaceRulePair {
PinyinInitial initial;
PinyinFinal final;
PinyinInitial new_initial;
PinyinFinal new_final;
} rules [] =
{
/*
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_I, SCIM_PINYIN_Yi, SCIM_PINYIN_I},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ia, SCIM_PINYIN_Yi, SCIM_PINYIN_A},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ian, SCIM_PINYIN_Yi, SCIM_PINYIN_An},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Iang, SCIM_PINYIN_Yi, SCIM_PINYIN_Ang},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Iao, SCIM_PINYIN_Yi, SCIM_PINYIN_Ao},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ie, SCIM_PINYIN_Yi, SCIM_PINYIN_E},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_In, SCIM_PINYIN_Yi, SCIM_PINYIN_In},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ing, SCIM_PINYIN_Yi, SCIM_PINYIN_Ing},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Iong, SCIM_PINYIN_Yi, SCIM_PINYIN_Ong},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Iou, SCIM_PINYIN_Yi, SCIM_PINYIN_Ou},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Iu, SCIM_PINYIN_Yi, SCIM_PINYIN_U},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_U, SCIM_PINYIN_Wo, SCIM_PINYIN_U},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ua, SCIM_PINYIN_Wo, SCIM_PINYIN_A},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Uai, SCIM_PINYIN_Wo, SCIM_PINYIN_Ai},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Uan, SCIM_PINYIN_Wo, SCIM_PINYIN_An},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Uang, SCIM_PINYIN_Wo, SCIM_PINYIN_Ang},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Uei, SCIM_PINYIN_Wo, SCIM_PINYIN_Ei},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Uen, SCIM_PINYIN_Wo, SCIM_PINYIN_En},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ueng, SCIM_PINYIN_Wo, SCIM_PINYIN_Eng},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ui, SCIM_PINYIN_Wo, SCIM_PINYIN_Ei},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Un, SCIM_PINYIN_Wo, SCIM_PINYIN_En},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Uo, SCIM_PINYIN_Wo, SCIM_PINYIN_O},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ue, SCIM_PINYIN_Yi, SCIM_PINYIN_Ue},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_V, SCIM_PINYIN_Yi, SCIM_PINYIN_U},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Van, SCIM_PINYIN_Yi, SCIM_PINYIN_Uan},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ve, SCIM_PINYIN_Yi, SCIM_PINYIN_Ue},
{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Vn, SCIM_PINYIN_Yi, SCIM_PINYIN_Un},
*/
{SCIM_PINYIN_Ne, SCIM_PINYIN_Ve, SCIM_PINYIN_Ne, SCIM_PINYIN_Ue},
{SCIM_PINYIN_Le, SCIM_PINYIN_Ve, SCIM_PINYIN_Le, SCIM_PINYIN_Ue},
{SCIM_PINYIN_Ji, SCIM_PINYIN_V, SCIM_PINYIN_Ji, SCIM_PINYIN_U},
{SCIM_PINYIN_Ji, SCIM_PINYIN_Van, SCIM_PINYIN_Ji, SCIM_PINYIN_Uan},
{SCIM_PINYIN_Ji, SCIM_PINYIN_Ve, SCIM_PINYIN_Ji, SCIM_PINYIN_Ue},
{SCIM_PINYIN_Ji, SCIM_PINYIN_Vn, SCIM_PINYIN_Ji, SCIM_PINYIN_Un},
{SCIM_PINYIN_Qi, SCIM_PINYIN_V, SCIM_PINYIN_Qi, SCIM_PINYIN_U},
{SCIM_PINYIN_Qi, SCIM_PINYIN_Van, SCIM_PINYIN_Qi, SCIM_PINYIN_Uan},
{SCIM_PINYIN_Qi, SCIM_PINYIN_Ve, SCIM_PINYIN_Qi, SCIM_PINYIN_Ue},
{SCIM_PINYIN_Qi, SCIM_PINYIN_Vn, SCIM_PINYIN_Qi, SCIM_PINYIN_Un},
{SCIM_PINYIN_Xi, SCIM_PINYIN_V, SCIM_PINYIN_Xi, SCIM_PINYIN_U},
{SCIM_PINYIN_Xi, SCIM_PINYIN_Van, SCIM_PINYIN_Xi, SCIM_PINYIN_Uan},
{SCIM_PINYIN_Xi, SCIM_PINYIN_Ve, SCIM_PINYIN_Xi, SCIM_PINYIN_Ue},
{SCIM_PINYIN_Xi, SCIM_PINYIN_Vn, SCIM_PINYIN_Xi, SCIM_PINYIN_Un}
};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -