⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scim_pinyin.cpp

📁 linux pda 输入法
💻 CPP
📖 第 1 页 / 共 3 页
字号:
	  m_validator (validator)
	  // m_custom (custom)
{
	if (!m_validator) m_validator = &scim_default_pinyin_validator;

	if (tablefile) load_table (tablefile);
}

bool
PinyinTable::output (std::ostream &os, bool binary) const
{
	unsigned char bytes [8];
	
	if (!binary) {
		os << scim_pinyin_table_text_header << "\n";
		os << scim_pinyin_table_version << "\n";
		os << m_table.size () << "\n";

		for (PinyinEntryVector::const_iterator i = m_table.begin(); i!=m_table.end(); i++)
			i->output_text (os);

	}/* else {
		os << scim_pinyin_table_binary_header << "\n";
		os << scim_pinyin_table_version << "\n";

		scim_uint32tobytes (bytes, (uint32) m_table.size ());
		os.write ((char*)bytes, sizeof (unsigned char) * 4);

		for (PinyinEntryVector::const_iterator i = m_table.begin(); i!=m_table.end(); i++)
			i->output_binary (os);
			}*/
	return true;
}

bool
PinyinTable::input (std::istream &is)
{
	char header [40];
	bool binary;

	if (!is) return false;
	
	is.getline (header, 40);

	if (strncmp (header,
		scim_pinyin_table_text_header,
		strlen (scim_pinyin_table_text_header)) == 0) {
		binary = false;
	} else if (strncmp (header,
		scim_pinyin_table_binary_header,
		strlen (scim_pinyin_table_binary_header)) == 0) {
		binary = true;
	} else {
		return false;
	}

	is.getline (header, 40);
	if (strncmp (header, scim_pinyin_table_version, strlen (scim_pinyin_table_version)) != 0)
		return false;

	uint32 i;
	uint32 n;
	PinyinEntryVector::iterator ev;

	if (!binary) {
		is >> n;

		// load pinyin table
		for (i=0; i<n; i++) {
			PinyinEntry entry (*m_validator, is/*, false*/);
			
			//if (!m_custom.use_tone) {
				entry.set_key (PinyinKey (entry.get_key ().get_initial (),
											  entry.get_key ().get_final (),
											  SCIM_PINYIN_ZeroTone));
				//}

			if (entry.get_key().get_final() == SCIM_PINYIN_ZeroFinal) {
				std::cerr << "Invalid entry: " << entry << "\n";
			} else {
				if ((ev = find_exact_entry (entry)) == m_table.end())
					m_table.push_back (entry);
				else {
					for (uint32 i=0; i<entry.size(); i++) {
						ev->insert (entry.get_char_with_frequency_by_index (i));
					}
				}
			}
		}
		/*
	} else {
		unsigned char bytes [8];
		is.read ((char*) bytes, sizeof (unsigned char) * 4);
		n = scim_bytestouint32 (bytes);

		// load pinyin table
		for (i=0; i<n; i++) {
			PinyinEntry entry (*m_validator, is, true);

			if (!m_custom.use_tone) {
				entry.set_key (PinyinKey (entry.get_key ().get_initial (),
											  entry.get_key ().get_final (),
											  SCIM_PINYIN_ZeroTone));
			}

			if (entry.get_key().get_final() == SCIM_PINYIN_ZeroFinal) {
				std::cerr << "Invalid entry: " << entry << "\n";
			} else {
				if ((ev = find_exact_entry (entry)) == m_table.end())
					m_table.push_back (entry);
				else {
					for (uint32 i=0; i<entry.size(); i++) {
						ev->insert (entry.get_char_with_frequency_by_index (i));
					}
				}
			}
			}*/
		}
	sort ();

	return true;
}

bool
PinyinTable::load_table (const char *tablefile)
{
	std::ifstream ifs(tablefile);
	if (!ifs) return false;
	if (input (ifs) && m_table.size () != 0) return true;
	return false;
}

bool
PinyinTable::save_table (const char *tablefile, bool binary) const
{
	std::ofstream ofs(tablefile);
	if (!ofs) return false;
	if (output (ofs, binary)) return true;
	return false;
}
/*
void
PinyinTable::update_custom_settings (const PinyinCustomSettings &custom,
									 const PinyinValidator *validator)
{
	m_pinyin_key_less  = PinyinKeyLessThan (custom);
	m_pinyin_key_equal = PinyinKeyEqualTo (custom);
	m_validator = validator;

	if (!m_validator)
		m_validator = &scim_default_pinyin_validator;

	m_custom = custom;
	sort ();
}

int
PinyinTable::get_all_chars (std::vector<ucs4_t> &vec) const
{
	std::vector<CharFrequencyPair> all;

	vec.clear ();

	get_all_chars_with_frequencies (all);

	for (std::vector<CharFrequencyPair>::const_iterator i = all.begin ();
			i != all.end (); ++i)
		vec.push_back (i->first);

	return vec.size ();
}

int
PinyinTable::get_all_chars_with_frequencies (std::vector<CharFrequencyPair> &vec) const
{
	vec.clear ();

	for (PinyinEntryVector::const_iterator i = m_table.begin (); i!= m_table.end (); i++)
		i->get_all_chars_with_frequencies (vec);

	if (!vec.size ()) return 0;

	std::sort (vec.begin (), vec.end (), CharFrequencyPairGreaterThanByCharAndFrequency ());
	vec.erase (std::unique (vec.begin (), vec.end (), CharFrequencyPairEqualToByChar ()), vec.end ());
	std::sort (vec.begin (), vec.end (), CharFrequencyPairGreaterThanByFrequency ());

	return vec.size ();
}
*/
int
PinyinTable::find_chars (std::vector <ucs4_t> &vec, PinyinKey key) const
{
	std::vector<CharFrequencyPair> all;

	vec.clear ();

	find_chars_with_frequencies (all, key);

	for (std::vector<CharFrequencyPair>::const_iterator i = all.begin ();
			i != all.end (); ++i)
		vec.push_back (i->first);

	return vec.size ();
}

int
PinyinTable::find_chars_with_frequencies (std::vector <CharFrequencyPair> &vec, PinyinKey key) const
{
	vec.clear ();

	std::pair<PinyinEntryVector::const_iterator, PinyinEntryVector::const_iterator> range =
		std::equal_range(m_table.begin(), m_table.end(), key, m_pinyin_key_less);

	for (PinyinEntryVector::const_iterator i = range.first; i!= range.second; i++) {
		i->get_all_chars_with_frequencies (vec);
	}

	if (!vec.size ()) return 0;

	std::sort (vec.begin (), vec.end (), CharFrequencyPairGreaterThanByCharAndFrequency ());
	vec.erase (std::unique (vec.begin (), vec.end (), CharFrequencyPairEqualToByChar ()), vec.end ());
	std::sort (vec.begin (), vec.end (), CharFrequencyPairGreaterThanByFrequency ());

	return vec.size ();
}

void
PinyinTable::erase (ucs4_t hz, const char *key)
{
	erase (hz, PinyinKey (*m_validator, key));
}

void
PinyinTable::erase (ucs4_t hz, PinyinKey key)
{
	if (key.zero()) {
		for (PinyinEntryVector::iterator i = m_table.begin(); i != m_table.end(); i++)
			i->erase (hz);
	} else {
		std::pair<PinyinEntryVector::iterator, PinyinEntryVector::iterator> range =
			std::equal_range(m_table.begin(), m_table.end(), key, m_pinyin_key_less);
		for (PinyinEntryVector::iterator i = range.first; i!= range.second; i++)
			i->erase (hz);
	}
	//erase_from_reverse_map (hz, key);
}

uint32
PinyinTable::get_char_frequency (ucs4_t ch, PinyinKey key)
{
	PinyinKeyVector keyvec;
	uint32 freq = 0;

	if (key.zero ())
		find_keys (keyvec, ch);
	else
		keyvec.push_back (key);

	for (PinyinKeyVector::iterator i = keyvec.begin (); i != keyvec.end (); ++i) {
		std::pair<PinyinEntryVector::iterator, PinyinEntryVector::iterator> range =
			std::equal_range(m_table.begin(), m_table.end(), *i, m_pinyin_key_less);
		for (PinyinEntryVector::iterator vi = range.first; vi!= range.second; ++vi) {
			freq += vi->get_char_frequency (ch);
		}
	}

	return freq;
}

void
PinyinTable::set_char_frequency (ucs4_t ch, uint32 freq, PinyinKey key)
{
	PinyinKeyVector keyvec;

	if (key.zero ())
		find_keys (keyvec, ch);
	else
		keyvec.push_back (key);

	for (PinyinKeyVector::iterator i = keyvec.begin (); i != keyvec.end (); ++i) {
		std::pair<PinyinEntryVector::iterator, PinyinEntryVector::iterator> range =
			std::equal_range(m_table.begin(), m_table.end(), *i, m_pinyin_key_less);
		for (PinyinEntryVector::iterator vi = range.first; vi != range.second; ++vi) {
			vi->set_char_frequency (ch, freq / (keyvec.size () * (range.second - range.first)));
		}
	}
}

void
PinyinTable::refresh (ucs4_t hz, uint32 shift, PinyinKey key)
{
	if (!hz) return;

	PinyinKeyVector keyvec;
	uint32 freq, delta;

	if (key.zero ())
		find_keys (keyvec, hz);
	else
		keyvec.push_back (key);

	for (PinyinKeyVector::iterator i = keyvec.begin (); i != keyvec.end (); ++i) {
		std::pair<PinyinEntryVector::iterator, PinyinEntryVector::iterator> range =
			std::equal_range(m_table.begin(), m_table.end(), *i, m_pinyin_key_less);
		for (PinyinEntryVector::iterator vi = range.first; vi!= range.second; ++vi) {
			vi->refresh_char_frequency (hz, shift);
		}
	}
}

void
PinyinTable::insert (ucs4_t hz, const char *key)
{
	insert (hz, PinyinKey (*m_validator, key));
}

void
PinyinTable::insert (ucs4_t hz, PinyinKey key)
{
	PinyinEntryVector::iterator i =
		std::lower_bound (m_table.begin(), m_table.end(), key, m_pinyin_key_less);

	if (i != m_table.end() && m_pinyin_key_equal (*i, key)) {
		i->insert (CharFrequencyPair (hz,0));
	} else {
		PinyinEntry entry (key);
		entry.insert (CharFrequencyPair (hz,0));
		m_table.insert (i, entry);
	}
	//insert_to_reverse_map (hz, key);
}

size_t
PinyinTable::size () const
{
	size_t num = 0;
	for (PinyinEntryVector::const_iterator i = m_table.begin(); i!= m_table.end(); i++)
		num += i->size ();

	return num;
}

int
PinyinTable::find_keys (PinyinKeyVector &vec, ucs4_t code)
{
	//	if (!m_revmap_ok) create_reverse_map ();

	vec.clear ();
	/*
	std::pair<ReversePinyinMap::const_iterator, ReversePinyinMap::const_iterator> result = 
		m_revmap.equal_range (code);
	
	for (ReversePinyinMap::const_iterator i = result.first; i != result.second; i++)
		vec.push_back (i->second);
	*/
	return vec.size ();
}

int
PinyinTable::find_key_strings (std::vector<PinyinKeyVector> &vec, const WideString & str)
{
	vec.clear ();

	PinyinKeyVector *key_vectors = new PinyinKeyVector [str.size()];

	for (uint32 i=0; i<str.length (); i++)
		find_keys (key_vectors[i], str [i]);

	PinyinKeyVector key_buffer;

	create_pinyin_key_vector_vector (vec, key_buffer, key_vectors, 0, str.size());

	delete [] key_vectors;
	return vec.size ();
}

bool
PinyinTable::has_key (const char *key) const
{
	return has_key (PinyinKey (*m_validator, key));
}

bool
PinyinTable::has_key (PinyinKey key) const
{
	return std::binary_search (m_table.begin(), m_table.end(), key, m_pinyin_key_less);
}

void
PinyinTable::sort ()
{
	std::sort (m_table.begin(), m_table.end(), m_pinyin_key_less);
}
/*
void
PinyinTable::create_reverse_map ()
{
	m_revmap.clear();

	PinyinKey key;

	for (PinyinEntryVector::iterator i = m_table.begin(); i != m_table.end(); i++) {
		key = i->get_key();
		for (unsigned int j = 0; j < i->size (); j++) {
			m_revmap.insert (ReversePinyinPair (i->get_char_by_index (j), key));
		}
	}

	m_revmap_ok = true;
}

void
PinyinTable::insert_to_reverse_map (ucs4_t code, PinyinKey key)
{
	if (key.zero())
		return;

	std::pair<ReversePinyinMap::iterator, ReversePinyinMap::iterator> result = 
		m_revmap.equal_range (code);

	for (ReversePinyinMap::iterator i = result.first; i != result.second; i++)
		if (m_pinyin_key_equal (i->second, key)) return;

	m_revmap.insert (ReversePinyinPair (code, key));
}

void
PinyinTable::erase_from_reverse_map (ucs4_t code, PinyinKey key)
{
	if (key.zero()) {
		m_revmap.erase (code);
	} else {
		std::pair<ReversePinyinMap::iterator, ReversePinyinMap::iterator> result = 
			m_revmap.equal_range (code);

		for (ReversePinyinMap::iterator i = result.first; i != result.second; i++)
			if (m_pinyin_key_equal (i->second, key)) {
				m_revmap.erase (i);
				break;
			}
	}
}
*/
PinyinTable::PinyinEntryVector::iterator
PinyinTable::find_exact_entry (PinyinKey key)
{
	PinyinKeyExactEqualTo eq;
	for (PinyinEntryVector::iterator i=m_table.begin (); i!=m_table.end (); i++)
		if (eq (*i, key)) return i;
	return m_table.end ();
}

void
PinyinTable::create_pinyin_key_vector_vector (std::vector<PinyinKeyVector> &vv,
											  PinyinKeyVector &key_buffer,
											  PinyinKeyVector *key_vectors,
											  int index,
											  int len)
{
	for (unsigned int i=0; i< key_vectors[index].size(); i++) {
		key_buffer.push_back ((key_vectors[index])[i]);
		if (index == len-1) {
			vv.push_back (key_buffer);
		} else {
			create_pinyin_key_vector_vector (vv, key_buffer, key_vectors, index+1, len);
		}
		key_buffer.pop_back ();
	}
}


/*
vi:ts=4:nowrap:ai
*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -