📄 scim_pinyin.cpp
字号:
for (unsigned int i=0; i<sizeof(rules)/sizeof(ReplaceRulePair); i++) {
if (rules[i].initial == initial && rules[i].final == final) {
initial = rules[i].new_initial;
final = rules[i].new_final;
break;
}
}
if (initial != SCIM_PINYIN_ZeroInitial && final == SCIM_PINYIN_Iou)
final = SCIM_PINYIN_Iu;
if (initial != SCIM_PINYIN_ZeroInitial && final == SCIM_PINYIN_Uei)
final = SCIM_PINYIN_Ui;
if (initial != SCIM_PINYIN_ZeroInitial && final == SCIM_PINYIN_Uen)
final = SCIM_PINYIN_Un;
}
int
PinyinKey::parse_pinyin_key (const PinyinValidator &validator,
PinyinParsedKeyVector &vec,
const char *key)
{
#if 0
vec.clear ();
int usedlen = 0;
int keylen = strlen (key);
if (keylen <= 0) return 0;
PinyinParsedKey aKey;
while (usedlen < keylen) {
if (!isalpha (*key)) {
key ++;
usedlen ++;
continue;
}
int len = aKey.set_key (validator, key);
if (len) {
aKey.set_pos (usedlen);
aKey.set_length (len);
vec.push_back (aKey);
} else {
break;
}
key += len;
usedlen += len;
}
return usedlen;
#else
vec.clear ();
int keylen = strlen (key);
if (keylen <= 0) return 0;
PinyinParsedKey aKey;
int usedlen = 0;
int len;
bool found;
const char *key_start, *key_end;
key_end = key + keylen;
while (key_end > key) {
if (*(key_end-1) == '\'') {
--key_end;
--keylen;
if (keylen == 0) break;
}
key_start = std::max (key_end - SCIM_PINYIN_KEY_MAXLEN, key);
found = false;
while (key_start < key_end) {
if (isalpha (*key_start)) {
len = aKey.set_key (validator, key_start, key_end - key_start);
if (len == key_end - key_start) {
found = true;
aKey.set_pos (key_start - key);
aKey.set_length (len);
usedlen += len;
key_end = key_start;
vec.push_back (aKey);
break;
}
}
++ key_start;
}
if (!found) {
-- keylen;
key_end = key + keylen;
usedlen = 0;
vec.clear ();
}
}
std::reverse (vec.begin (), vec.end ());
return usedlen;
#endif
}
int
PinyinKey::parse_pinyin_key (const PinyinValidator &validator,
PinyinKeyVector &vec,
const char *key)
{
#if 0
vec.clear ();
int usedlen = 0;
int keylen = strlen (key);
if (keylen <= 0) return 0;
PinyinKey aKey;
while (usedlen < keylen) {
if (!isalpha (*key)) {
key ++;
usedlen ++;
continue;
}
int len = aKey.set_key (validator, key);
if (len && validator (aKey)) {
vec.push_back (aKey);
} else if (!len) break;
key += len;
usedlen += len;
}
return usedlen;
#else
vec.clear ();
int keylen = strlen (key);
if (keylen <= 0) return 0;
PinyinKey aKey;
int usedlen = 0;
int len;
bool found;
const char *key_start, *key_end;
key_end = key + keylen;
while (key_end > key) {
if (*(key_end-1) == '\'') {
--key_end;
--keylen;
if (keylen == 0) break;
}
key_start = std::max (key_end - SCIM_PINYIN_KEY_MAXLEN, key);
found = false;
while (key_start < key_end) {
if (isalpha (*key_start)) {
len = aKey.set_key (validator, key_start, key_end - key_start);
if (len == key_end - key_start) {
found = true;
usedlen += len;
key_end = key_start;
vec.push_back (aKey);
break;
}
}
++ key_start;
}
if (!found) {
-- keylen;
key_end = key + keylen;
usedlen = 0;
vec.clear ();
}
}
std::reverse (vec.begin (), vec.end ());
return usedlen;
#endif
}
//////////////////////////////////////////////////////////////////////////////
// implementation of PinyinValidator
PinyinValidator::PinyinValidator (/*const PinyinCustomSettings &custom,(*/
const PinyinTable *table)
{
initialize (/*custom, */table);
}
void
PinyinValidator::initialize (/*const PinyinCustomSettings &custom,*/
const PinyinTable *table)
{
memset (m_bitmap, 0, PinyinValidatorBitmapSize);
if (!table || table->size() <=0) return;
for (int i=0; i<SCIM_PINYIN_InitialNumber; i++) {
for (int j=0; j<SCIM_PINYIN_FinalNumber; j++) {
for (int k=0; k<SCIM_PINYIN_ToneNumber; k++) {
PinyinKey key(static_cast<PinyinInitial>(i),
static_cast<PinyinFinal>(j),
static_cast<PinyinTone>(k));
if (!table->has_key (key)) {
int val = (k * SCIM_PINYIN_FinalNumber + j) * SCIM_PINYIN_InitialNumber + i;
m_bitmap [val >> 3] |= (1 << (val % 8));
}
}
}
}
}
bool
PinyinValidator::operator () (PinyinKey key) const
{
if (key.get_initial () == SCIM_PINYIN_ZeroInitial && key.get_final () == SCIM_PINYIN_ZeroFinal)
return false;
int val = (key.get_tone () * SCIM_PINYIN_FinalNumber + key.get_final ()) *
SCIM_PINYIN_InitialNumber + key.get_initial ();
return (m_bitmap [ val >> 3 ] & (1 << (val % 8))) == 0;
}
/*
//////////////////////////////////////////////////////////////////////////////
// implementation of PinyinKey comparision classes
static int
__scim_pinyin_compare_initial (const PinyinCustomSettings &custom,
PinyinInitial lhs,
PinyinInitial rhs)
{
// Ambiguity LeRi, NeLe, FoHe will break binary search
// we treat them as special cases
if (custom.use_ambiguities [SCIM_PINYIN_AmbLeRi]) {
if (lhs == SCIM_PINYIN_Ri) lhs = SCIM_PINYIN_Le;
if (rhs == SCIM_PINYIN_Ri) rhs = SCIM_PINYIN_Le;
}
if (custom.use_ambiguities [SCIM_PINYIN_AmbNeLe]) {
if (lhs == SCIM_PINYIN_Ne) lhs = SCIM_PINYIN_Le;
if (rhs == SCIM_PINYIN_Ne) rhs = SCIM_PINYIN_Le;
}
if (custom.use_ambiguities [SCIM_PINYIN_AmbFoHe]) {
if (lhs == SCIM_PINYIN_He) lhs = SCIM_PINYIN_Fo;
if (rhs == SCIM_PINYIN_He) rhs = SCIM_PINYIN_Fo;
}
if ((lhs == rhs) ||
(custom.use_ambiguities [SCIM_PINYIN_AmbZhiZi] &&
((lhs == SCIM_PINYIN_Zhi && rhs == SCIM_PINYIN_Zi) ||
(lhs == SCIM_PINYIN_Zi && rhs == SCIM_PINYIN_Zhi))) ||
(custom.use_ambiguities [SCIM_PINYIN_AmbChiCi] &&
((lhs == SCIM_PINYIN_Chi && rhs == SCIM_PINYIN_Ci) ||
(lhs == SCIM_PINYIN_Ci && rhs == SCIM_PINYIN_Chi))) ||
(custom.use_ambiguities [SCIM_PINYIN_AmbShiSi] &&
((lhs == SCIM_PINYIN_Shi && rhs == SCIM_PINYIN_Si) ||
(lhs == SCIM_PINYIN_Si && rhs == SCIM_PINYIN_Shi))))
return 0;
else if (lhs < rhs) return -1;
return 1;
}
static int
__scim_pinyin_compare_final (const PinyinCustomSettings &custom,
PinyinFinal lhs,
PinyinFinal rhs)
{
if(((lhs == rhs) ||
(custom.use_ambiguities [SCIM_PINYIN_AmbAnAng] &&
((lhs == SCIM_PINYIN_An && rhs == SCIM_PINYIN_Ang) ||
(lhs == SCIM_PINYIN_Ang && rhs == SCIM_PINYIN_An))) ||
(custom.use_ambiguities [SCIM_PINYIN_AmbEnEng] &&
((lhs == SCIM_PINYIN_En && rhs == SCIM_PINYIN_Eng) ||
(lhs == SCIM_PINYIN_Eng && rhs == SCIM_PINYIN_En))) ||
(custom.use_ambiguities [SCIM_PINYIN_AmbInIng] &&
((lhs == SCIM_PINYIN_In && rhs == SCIM_PINYIN_Ing) ||
(lhs == SCIM_PINYIN_Ing && rhs == SCIM_PINYIN_In)))))
return 0;
else if (custom.use_incomplete && (lhs == SCIM_PINYIN_ZeroFinal || rhs == SCIM_PINYIN_ZeroFinal))
return 0;
else if (lhs < rhs) return -1;
return 1;
}
static int
__scim_pinyin_compare_tone (const PinyinCustomSettings &custom,
PinyinTone lhs,
PinyinTone rhs)
{
if(lhs == rhs || lhs == SCIM_PINYIN_ZeroTone || rhs == SCIM_PINYIN_ZeroTone || !custom.use_tone)
return 0;
else if (lhs < rhs) return -1;
return 1;
}
bool
PinyinKeyLessThan::operator () (PinyinKey lhs, PinyinKey rhs) const
{
switch (__scim_pinyin_compare_initial (m_custom,
static_cast<PinyinInitial>(lhs.m_initial),
static_cast<PinyinInitial>(rhs.m_initial))) {
case 0:
switch (__scim_pinyin_compare_final (m_custom,
static_cast<PinyinFinal>(lhs.m_final),
static_cast<PinyinFinal>(rhs.m_final))) {
case 0:
switch (__scim_pinyin_compare_tone (m_custom,
static_cast<PinyinTone>(lhs.m_tone),
static_cast<PinyinTone>(rhs.m_tone))) {
case -1:
return true;
default:
return false;
}
case -1:
return true;
default:
return false;
}
case -1:
return true;
default:
return false;
}
return false;
}
bool
PinyinKeyEqualTo::operator () (PinyinKey lhs, PinyinKey rhs) const
{
if (!__scim_pinyin_compare_initial (m_custom,
static_cast<PinyinInitial>(lhs.m_initial),
static_cast<PinyinInitial>(rhs.m_initial)) &&
!__scim_pinyin_compare_final (m_custom,
static_cast<PinyinFinal>(lhs.m_final),
static_cast<PinyinFinal>(rhs.m_final)) &&
!__scim_pinyin_compare_tone (m_custom,
static_cast<PinyinTone>(lhs.m_tone),
static_cast<PinyinTone>(rhs.m_tone)))
return true;
return false;
}
*/
//////////////////////////////////////////////////////////////////////////////
// implementation of PinyinEntry
std::ostream&
PinyinEntry::output_text (std::ostream &os) const
{
m_key.output_text (os) << "\t" << size() << "\t";
for (std::vector<CharFrequencyPair>::const_iterator i = m_chars.begin(); i != m_chars.end(); i++) {
utf8_write_wchar (os, i->first);
os << i->second << ' ';
}
os << '\n';
return os;
}
/*
std::ostream&
PinyinEntry::output_binary (std::ostream &os) const
{
unsigned char bytes [8];
m_key.output_binary (os);
scim_uint32tobytes (bytes, (uint32) size());
os.write ((char*)bytes, sizeof (unsigned char) * 4);
for (std::vector<CharFrequencyPair>::const_iterator i = m_chars.begin(); i != m_chars.end(); i++) {
utf8_write_wchar (os, i->first);
scim_uint32tobytes (bytes, i->second);
os.write ((char*)bytes, sizeof (unsigned char) * 4);
}
return os;
}
*/
std::istream&
PinyinEntry::input_text (const PinyinValidator &validator, std::istream &is)
{
m_chars.clear();
String value;
uint32 n, len, freq;
ucs4_t wc;
m_key.input_text (validator, is);
is >> n;
m_chars.reserve (n+1);
for (uint32 i=0; i<n; i++) {
is >> value;
if(strcmp(value.c_str(),"0")==0){
continue;
}
if ((len = utf8_mbtowc (&wc, (const unsigned char*)(value.c_str()), value.length())) > 0) {
if (value.length () > len)
freq = atoi (value.c_str() + len);
else
freq = 0;
m_chars.push_back (CharFrequencyPair (wc,freq));
}
}
sort ();
std::vector <CharFrequencyPair> (m_chars).swap (m_chars);
return is;
}
/*
std::istream&
PinyinEntry::input_binary (const PinyinValidator &validator, std::istream &is)
{
m_chars.clear();
uint32 n, freq;
ucs4_t wc;
unsigned char bytes [8];
m_key.input_binary (validator, is);
is.read ((char*)bytes, sizeof (unsigned char) * 4);
n = scim_bytestouint32 (bytes);
m_chars.reserve (n+1);
for (uint32 i=0; i<n; i++) {
if ((wc = utf8_read_wchar (is)) > 0) {
is.read ((char*)bytes, sizeof (unsigned char) * 4);
freq = scim_bytestouint32 (bytes);
m_chars.push_back (CharFrequencyPair (wc, freq));
}
}
sort ();
std::vector <CharFrequencyPair> (m_chars).swap (m_chars);
return is;
}
*/
//////////////////////////////////////////////////////////////////////////////
// implementation of PinyinTable
PinyinTable::PinyinTable (/*const PinyinCustomSettings &custom,*/
const PinyinValidator *validator,
std::istream &is)
: /*m_revmap_ok (false),
m_pinyin_key_less (custom),
m_pinyin_key_equal (custom),*/
m_validator (validator)
//m_custom (custom)
{
if (!m_validator) m_validator = &scim_default_pinyin_validator;
input (is);
}
PinyinTable::PinyinTable (/*(const PinyinCustomSettings &custom,*/
const PinyinValidator *validator,
const char *tablefile)
: /*m_revmap_ok (false),
m_pinyin_key_less (custom),
m_pinyin_key_equal (custom),*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -