📄 scim_pinyin.cpp
字号:
m_validator (validator)
// m_custom (custom)
{
if (!m_validator) m_validator = &scim_default_pinyin_validator;
if (tablefile) load_table (tablefile);
}
bool
PinyinTable::output (std::ostream &os, bool binary) const
{
unsigned char bytes [8];
if (!binary) {
os << scim_pinyin_table_text_header << "\n";
os << scim_pinyin_table_version << "\n";
os << m_table.size () << "\n";
for (PinyinEntryVector::const_iterator i = m_table.begin(); i!=m_table.end(); i++)
i->output_text (os);
}/* else {
os << scim_pinyin_table_binary_header << "\n";
os << scim_pinyin_table_version << "\n";
scim_uint32tobytes (bytes, (uint32) m_table.size ());
os.write ((char*)bytes, sizeof (unsigned char) * 4);
for (PinyinEntryVector::const_iterator i = m_table.begin(); i!=m_table.end(); i++)
i->output_binary (os);
}*/
return true;
}
bool
PinyinTable::input (std::istream &is)
{
char header [40];
bool binary;
if (!is) return false;
is.getline (header, 40);
if (strncmp (header,
scim_pinyin_table_text_header,
strlen (scim_pinyin_table_text_header)) == 0) {
binary = false;
} else if (strncmp (header,
scim_pinyin_table_binary_header,
strlen (scim_pinyin_table_binary_header)) == 0) {
binary = true;
} else {
return false;
}
is.getline (header, 40);
if (strncmp (header, scim_pinyin_table_version, strlen (scim_pinyin_table_version)) != 0)
return false;
uint32 i;
uint32 n;
PinyinEntryVector::iterator ev;
if (!binary) {
is >> n;
// load pinyin table
for (i=0; i<n; i++) {
PinyinEntry entry (*m_validator, is/*, false*/);
//if (!m_custom.use_tone) {
entry.set_key (PinyinKey (entry.get_key ().get_initial (),
entry.get_key ().get_final (),
SCIM_PINYIN_ZeroTone));
//}
if (entry.get_key().get_final() == SCIM_PINYIN_ZeroFinal) {
std::cerr << "Invalid entry: " << entry << "\n";
} else {
if ((ev = find_exact_entry (entry)) == m_table.end())
m_table.push_back (entry);
else {
for (uint32 i=0; i<entry.size(); i++) {
ev->insert (entry.get_char_with_frequency_by_index (i));
}
}
}
}
/*
} else {
unsigned char bytes [8];
is.read ((char*) bytes, sizeof (unsigned char) * 4);
n = scim_bytestouint32 (bytes);
// load pinyin table
for (i=0; i<n; i++) {
PinyinEntry entry (*m_validator, is, true);
if (!m_custom.use_tone) {
entry.set_key (PinyinKey (entry.get_key ().get_initial (),
entry.get_key ().get_final (),
SCIM_PINYIN_ZeroTone));
}
if (entry.get_key().get_final() == SCIM_PINYIN_ZeroFinal) {
std::cerr << "Invalid entry: " << entry << "\n";
} else {
if ((ev = find_exact_entry (entry)) == m_table.end())
m_table.push_back (entry);
else {
for (uint32 i=0; i<entry.size(); i++) {
ev->insert (entry.get_char_with_frequency_by_index (i));
}
}
}
}*/
}
sort ();
return true;
}
bool
PinyinTable::load_table (const char *tablefile)
{
std::ifstream ifs(tablefile);
if (!ifs) return false;
if (input (ifs) && m_table.size () != 0) return true;
return false;
}
bool
PinyinTable::save_table (const char *tablefile, bool binary) const
{
std::ofstream ofs(tablefile);
if (!ofs) return false;
if (output (ofs, binary)) return true;
return false;
}
/*
void
PinyinTable::update_custom_settings (const PinyinCustomSettings &custom,
const PinyinValidator *validator)
{
m_pinyin_key_less = PinyinKeyLessThan (custom);
m_pinyin_key_equal = PinyinKeyEqualTo (custom);
m_validator = validator;
if (!m_validator)
m_validator = &scim_default_pinyin_validator;
m_custom = custom;
sort ();
}
int
PinyinTable::get_all_chars (std::vector<ucs4_t> &vec) const
{
std::vector<CharFrequencyPair> all;
vec.clear ();
get_all_chars_with_frequencies (all);
for (std::vector<CharFrequencyPair>::const_iterator i = all.begin ();
i != all.end (); ++i)
vec.push_back (i->first);
return vec.size ();
}
int
PinyinTable::get_all_chars_with_frequencies (std::vector<CharFrequencyPair> &vec) const
{
vec.clear ();
for (PinyinEntryVector::const_iterator i = m_table.begin (); i!= m_table.end (); i++)
i->get_all_chars_with_frequencies (vec);
if (!vec.size ()) return 0;
std::sort (vec.begin (), vec.end (), CharFrequencyPairGreaterThanByCharAndFrequency ());
vec.erase (std::unique (vec.begin (), vec.end (), CharFrequencyPairEqualToByChar ()), vec.end ());
std::sort (vec.begin (), vec.end (), CharFrequencyPairGreaterThanByFrequency ());
return vec.size ();
}
*/
int
PinyinTable::find_chars (std::vector <ucs4_t> &vec, PinyinKey key) const
{
std::vector<CharFrequencyPair> all;
vec.clear ();
find_chars_with_frequencies (all, key);
for (std::vector<CharFrequencyPair>::const_iterator i = all.begin ();
i != all.end (); ++i)
vec.push_back (i->first);
return vec.size ();
}
int
PinyinTable::find_chars_with_frequencies (std::vector <CharFrequencyPair> &vec, PinyinKey key) const
{
vec.clear ();
std::pair<PinyinEntryVector::const_iterator, PinyinEntryVector::const_iterator> range =
std::equal_range(m_table.begin(), m_table.end(), key, m_pinyin_key_less);
for (PinyinEntryVector::const_iterator i = range.first; i!= range.second; i++) {
i->get_all_chars_with_frequencies (vec);
}
if (!vec.size ()) return 0;
std::sort (vec.begin (), vec.end (), CharFrequencyPairGreaterThanByCharAndFrequency ());
vec.erase (std::unique (vec.begin (), vec.end (), CharFrequencyPairEqualToByChar ()), vec.end ());
std::sort (vec.begin (), vec.end (), CharFrequencyPairGreaterThanByFrequency ());
return vec.size ();
}
void
PinyinTable::erase (ucs4_t hz, const char *key)
{
erase (hz, PinyinKey (*m_validator, key));
}
void
PinyinTable::erase (ucs4_t hz, PinyinKey key)
{
if (key.zero()) {
for (PinyinEntryVector::iterator i = m_table.begin(); i != m_table.end(); i++)
i->erase (hz);
} else {
std::pair<PinyinEntryVector::iterator, PinyinEntryVector::iterator> range =
std::equal_range(m_table.begin(), m_table.end(), key, m_pinyin_key_less);
for (PinyinEntryVector::iterator i = range.first; i!= range.second; i++)
i->erase (hz);
}
//erase_from_reverse_map (hz, key);
}
uint32
PinyinTable::get_char_frequency (ucs4_t ch, PinyinKey key)
{
PinyinKeyVector keyvec;
uint32 freq = 0;
if (key.zero ())
find_keys (keyvec, ch);
else
keyvec.push_back (key);
for (PinyinKeyVector::iterator i = keyvec.begin (); i != keyvec.end (); ++i) {
std::pair<PinyinEntryVector::iterator, PinyinEntryVector::iterator> range =
std::equal_range(m_table.begin(), m_table.end(), *i, m_pinyin_key_less);
for (PinyinEntryVector::iterator vi = range.first; vi!= range.second; ++vi) {
freq += vi->get_char_frequency (ch);
}
}
return freq;
}
void
PinyinTable::set_char_frequency (ucs4_t ch, uint32 freq, PinyinKey key)
{
PinyinKeyVector keyvec;
if (key.zero ())
find_keys (keyvec, ch);
else
keyvec.push_back (key);
for (PinyinKeyVector::iterator i = keyvec.begin (); i != keyvec.end (); ++i) {
std::pair<PinyinEntryVector::iterator, PinyinEntryVector::iterator> range =
std::equal_range(m_table.begin(), m_table.end(), *i, m_pinyin_key_less);
for (PinyinEntryVector::iterator vi = range.first; vi != range.second; ++vi) {
vi->set_char_frequency (ch, freq / (keyvec.size () * (range.second - range.first)));
}
}
}
void
PinyinTable::refresh (ucs4_t hz, uint32 shift, PinyinKey key)
{
if (!hz) return;
PinyinKeyVector keyvec;
uint32 freq, delta;
if (key.zero ())
find_keys (keyvec, hz);
else
keyvec.push_back (key);
for (PinyinKeyVector::iterator i = keyvec.begin (); i != keyvec.end (); ++i) {
std::pair<PinyinEntryVector::iterator, PinyinEntryVector::iterator> range =
std::equal_range(m_table.begin(), m_table.end(), *i, m_pinyin_key_less);
for (PinyinEntryVector::iterator vi = range.first; vi!= range.second; ++vi) {
vi->refresh_char_frequency (hz, shift);
}
}
}
void
PinyinTable::insert (ucs4_t hz, const char *key)
{
insert (hz, PinyinKey (*m_validator, key));
}
void
PinyinTable::insert (ucs4_t hz, PinyinKey key)
{
PinyinEntryVector::iterator i =
std::lower_bound (m_table.begin(), m_table.end(), key, m_pinyin_key_less);
if (i != m_table.end() && m_pinyin_key_equal (*i, key)) {
i->insert (CharFrequencyPair (hz,0));
} else {
PinyinEntry entry (key);
entry.insert (CharFrequencyPair (hz,0));
m_table.insert (i, entry);
}
//insert_to_reverse_map (hz, key);
}
size_t
PinyinTable::size () const
{
size_t num = 0;
for (PinyinEntryVector::const_iterator i = m_table.begin(); i!= m_table.end(); i++)
num += i->size ();
return num;
}
int
PinyinTable::find_keys (PinyinKeyVector &vec, ucs4_t code)
{
// if (!m_revmap_ok) create_reverse_map ();
vec.clear ();
/*
std::pair<ReversePinyinMap::const_iterator, ReversePinyinMap::const_iterator> result =
m_revmap.equal_range (code);
for (ReversePinyinMap::const_iterator i = result.first; i != result.second; i++)
vec.push_back (i->second);
*/
return vec.size ();
}
int
PinyinTable::find_key_strings (std::vector<PinyinKeyVector> &vec, const WideString & str)
{
vec.clear ();
PinyinKeyVector *key_vectors = new PinyinKeyVector [str.size()];
for (uint32 i=0; i<str.length (); i++)
find_keys (key_vectors[i], str [i]);
PinyinKeyVector key_buffer;
create_pinyin_key_vector_vector (vec, key_buffer, key_vectors, 0, str.size());
delete [] key_vectors;
return vec.size ();
}
bool
PinyinTable::has_key (const char *key) const
{
return has_key (PinyinKey (*m_validator, key));
}
bool
PinyinTable::has_key (PinyinKey key) const
{
return std::binary_search (m_table.begin(), m_table.end(), key, m_pinyin_key_less);
}
void
PinyinTable::sort ()
{
std::sort (m_table.begin(), m_table.end(), m_pinyin_key_less);
}
/*
void
PinyinTable::create_reverse_map ()
{
m_revmap.clear();
PinyinKey key;
for (PinyinEntryVector::iterator i = m_table.begin(); i != m_table.end(); i++) {
key = i->get_key();
for (unsigned int j = 0; j < i->size (); j++) {
m_revmap.insert (ReversePinyinPair (i->get_char_by_index (j), key));
}
}
m_revmap_ok = true;
}
void
PinyinTable::insert_to_reverse_map (ucs4_t code, PinyinKey key)
{
if (key.zero())
return;
std::pair<ReversePinyinMap::iterator, ReversePinyinMap::iterator> result =
m_revmap.equal_range (code);
for (ReversePinyinMap::iterator i = result.first; i != result.second; i++)
if (m_pinyin_key_equal (i->second, key)) return;
m_revmap.insert (ReversePinyinPair (code, key));
}
void
PinyinTable::erase_from_reverse_map (ucs4_t code, PinyinKey key)
{
if (key.zero()) {
m_revmap.erase (code);
} else {
std::pair<ReversePinyinMap::iterator, ReversePinyinMap::iterator> result =
m_revmap.equal_range (code);
for (ReversePinyinMap::iterator i = result.first; i != result.second; i++)
if (m_pinyin_key_equal (i->second, key)) {
m_revmap.erase (i);
break;
}
}
}
*/
PinyinTable::PinyinEntryVector::iterator
PinyinTable::find_exact_entry (PinyinKey key)
{
PinyinKeyExactEqualTo eq;
for (PinyinEntryVector::iterator i=m_table.begin (); i!=m_table.end (); i++)
if (eq (*i, key)) return i;
return m_table.end ();
}
void
PinyinTable::create_pinyin_key_vector_vector (std::vector<PinyinKeyVector> &vv,
PinyinKeyVector &key_buffer,
PinyinKeyVector *key_vectors,
int index,
int len)
{
for (unsigned int i=0; i< key_vectors[index].size(); i++) {
key_buffer.push_back ((key_vectors[index])[i]);
if (index == len-1) {
vv.push_back (key_buffer);
} else {
create_pinyin_key_vector_vector (vv, key_buffer, key_vectors, index+1, len);
}
key_buffer.pop_back ();
}
}
/*
vi:ts=4:nowrap:ai
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -