⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scim_pinyin.h

📁 拼音出入法,在LINUX上可以运行,感觉还可以,包括*.c和*.h.
💻 H
📖 第 1 页 / 共 2 页
字号:
 * between two pinyin keys.
 */
class PinyinKeyExactLessThan
	: public std::binary_function <PinyinKey, PinyinKey, bool>
{
public:
	bool operator () (PinyinKey lhs,
					  PinyinKey rhs) const {
		if (lhs.m_initial < rhs.m_initial)
			return true;
		else if (lhs.m_initial == rhs.m_initial) {
			if (lhs.m_final < rhs.m_final)
				return true;
			else if	(lhs.m_final == rhs.m_final &&
					 lhs.m_tone < rhs.m_tone)
				return true;
		}
		return false;
	}
};

/**
 * a binary functional class to do bitwise equal to comparison
 * between two pinyin keys.
 */
class PinyinKeyExactEqualTo
	: public std::binary_function <PinyinKey, PinyinKey, bool>
{
public:
	bool operator () (PinyinKey lhs,
					  PinyinKey rhs) const {
		if (lhs.m_initial == rhs.m_initial &&
			lhs.m_final == rhs.m_final &&
			lhs.m_tone == rhs.m_tone)
			return true;
		return false;
	}
};

/**
 * this class is for storing a key which is parsed from a string.
 */
struct PinyinParsedKey : public PinyinKey
{
	int m_pos;		/**< the position of this key in the whole string. */
	int m_length;	/**< the length of string used by this key. */

public:
	/**
	 * constructor
	 */
	PinyinParsedKey (int pos = 0,
					 int length = 0,
					 PinyinInitial initial = SCIM_PINYIN_ZeroInitial,
					 PinyinFinal final = SCIM_PINYIN_ZeroFinal,
					 PinyinTone tone = SCIM_PINYIN_ZeroTone)
		: PinyinKey (initial, final, tone), m_pos (pos), m_length (length) { }

	/**
	 * get the key's position in the whole string.
	 */
	int get_pos () const { return m_pos; }

	/**
	 * get length of the key string.
	 */
	int get_length () const { return m_length; }

	/**
	 * get the key's end position in the whole string.
	 */
	int get_end_pos () const { return m_pos + m_length; }
	
	/**
	 * set the key's position.
	 */
	void set_pos (int pos) { m_pos = pos; }

	/**
	 * set the key's length.
	 */
	void set_length (int length) { m_length = length; }
};

class CharFrequencyPairLessThanByChar {
public:
	bool operator () (const CharFrequencyPair &lhs,
					  const CharFrequencyPair &rhs) const {
		return lhs.first < rhs.first;
	}
	bool operator () (const CharFrequencyPair &lhs,
					  ucs4_t rhs) const {
		return lhs.first < rhs;
	}
	bool operator () (ucs4_t lhs,
					  const CharFrequencyPair &rhs) const {
		return lhs < rhs.first;
	}
};

class CharFrequencyPairGreaterThanByChar {
public:
	bool operator () (const CharFrequencyPair &lhs,
					  const CharFrequencyPair &rhs) const {
		return lhs.first > rhs.first;
	}
	bool operator () (const CharFrequencyPair &lhs,
					  ucs4_t rhs) const {
		return lhs.first > rhs;
	}
	bool operator () (ucs4_t lhs,
					  const CharFrequencyPair &rhs) const {
		return lhs > rhs.first;
	}
};

class CharFrequencyPairLessThanByFrequency {
public:
	bool operator () (const CharFrequencyPair &lhs,
					  const CharFrequencyPair &rhs) const {
		return lhs.second < rhs.second;
	}
	bool operator () (const CharFrequencyPair &lhs,
					  uint32 rhs) const {
		return lhs.second < rhs;
	}
	bool operator () (uint32 lhs,
					  const CharFrequencyPair &rhs) const {
		return lhs < rhs.second;
	}
};

class CharFrequencyPairGreaterThanByFrequency {
public:
	bool operator () (const CharFrequencyPair &lhs,
					  const CharFrequencyPair &rhs) const {
		return lhs.second > rhs.second;
	}
	bool operator () (const CharFrequencyPair &lhs,
					  uint32 rhs) const {
		return lhs.second > rhs;
	}
	bool operator () (uint32 lhs,
					  const CharFrequencyPair &rhs) const {
		return lhs > rhs.second;
	}
};

class CharFrequencyPairLessThanByCharAndFrequency {
public:
	bool operator () (const CharFrequencyPair &lhs,
					  const CharFrequencyPair &rhs) const {
		if (lhs.first < rhs.first) return true;
		if (lhs.first > rhs.first) return false;
		return lhs.second < rhs.second;
	}
};

class CharFrequencyPairGreaterThanByCharAndFrequency {
public:
	bool operator () (const CharFrequencyPair &lhs,
					  const CharFrequencyPair &rhs) const {
		if (lhs.first > rhs.first) return true;
		if (lhs.first < rhs.first) return false;
		return lhs.second > rhs.second;
	}
};

class CharFrequencyPairEqualToByChar {
public:
	bool operator () (const CharFrequencyPair &lhs,
					  const CharFrequencyPair &rhs) const {
		return lhs.first == rhs.first;
	}
};

/**
 * A PinyinEntry has a pinyin key and a set of ucs4_t,
 * whose pronouncation are same as the key.
 */
class PinyinEntry
{
	PinyinKey m_key;
		/**< the pinyin key of this entry */

	std::vector <CharFrequencyPair> m_chars;
		/**< the vector to store the chars and their frequencies */

public:
	/**
	 * constructor
	 */
	PinyinEntry (PinyinKey key)
		: m_key (key) {}

	/**
	 * copy constructor
	 */
	PinyinEntry (const PinyinEntry &entry)
		: m_key (entry.m_key), m_chars (entry.m_chars) {}

	/**
	 * constructor.
	 * read this entry from a stream.
	 */
	PinyinEntry (const PinyinValidator &validator,
				 std::istream &is
		     /*bool binary = false*/) {
		/*if (binary) input_binary (validator, is);
		  else*/ input_text (validator, is);
	}

	/**
	 * copy operator.
	 */
	const PinyinEntry& operator = (const PinyinEntry &entry) {
		if (this != &entry) {
			m_key = entry.m_key;
			m_chars = entry.m_chars;
		}
		return *this;
	}

	/**
	 * set pinyin key of this entry.
	 */
	void set_key (PinyinKey key) {
		m_key = key;
	}

	/**
	 * get the pinyin key of this entry.
	 */
	PinyinKey get_key () const {
		return m_key;
	}

	/**
	 * check if this entry has the char.
	 */
	bool has_char (ucs4_t c) const {
		return std::binary_search (
						m_chars.begin (),
						m_chars.end (),
						c,
						CharFrequencyPairLessThanByChar ());
	}

	/**
	 * sort all chars. 
	 */
	void sort () {
		std::sort (m_chars.begin(), m_chars.end());
	}

	/**
	 * clear this entry.
	 */
	void clear () {
		std::vector <CharFrequencyPair> ().swap (m_chars);
	}

	/**
	 * return entry size (number of chars).
	 */
	size_t size () const {
		return m_chars.size();
	}

	/**
	 * insert a char into this entry.
	 */
	void insert (const CharFrequencyPair &ch) {
		std::vector<CharFrequencyPair>::iterator i =
			std::lower_bound (
						m_chars.begin (),
						m_chars.end (),
						ch.first,
						CharFrequencyPairLessThanByChar ());

		if (i != m_chars.end () && i->first == ch.first) {
				if (ch.second > i->second)
					i->second = ch.second;
		} else {
			m_chars.insert (i, ch);
		}
	}

	/**
	 * erase a char from this entry.
	 */
	void erase (ucs4_t c) {
		std::vector<CharFrequencyPair>::iterator i =
			std::lower_bound (
						m_chars.begin (),
						m_chars.end (),
						c,
						CharFrequencyPairLessThanByChar ());

		if (i != m_chars.end() && i->first == c) m_chars.erase (i);
	}

	/**
	 * get the char at position index.
	 */
	ucs4_t get_char_by_index (unsigned int index) const {
		return m_chars [index].first;
	}

	/**
	 * get the char with its frequency.
	 */
	const CharFrequencyPair & get_char_with_frequency_by_index (unsigned int index) const {
		return m_chars [index];
	}

	int get_all_chars (std::vector<ucs4_t> &vec) const {
		for (std::vector<CharFrequencyPair>::const_iterator i = m_chars.begin ();
				i != m_chars.end (); ++ i)
			vec.push_back (i->first);
		return vec.size ();
	}

	int get_all_chars_with_frequencies (std::vector<CharFrequencyPair> &vec) const {
		for (std::vector<CharFrequencyPair>::const_iterator i = m_chars.begin ();
				i != m_chars.end (); ++ i)
			vec.push_back (*i);
		return vec.size ();
	}

	uint32 get_char_frequency (ucs4_t ch) const {
		std::vector<CharFrequencyPair>::const_iterator i =
			std::lower_bound (
						m_chars.begin (),
						m_chars.end (),
						ch,
						CharFrequencyPairLessThanByChar ());

		if (i != m_chars.end() && i->first == ch)
			return i->second;

		return 0;
	}

	void set_char_frequency (ucs4_t ch, uint32 freq) {
		std::vector<CharFrequencyPair>::iterator i =
			std::lower_bound (
						m_chars.begin (),
						m_chars.end (),
						ch,
						CharFrequencyPairLessThanByChar ());

		if (i != m_chars.end() && i->first == ch)
			i->second = freq;
	}

	void refresh_char_frequency (ucs4_t ch, uint32 shift) {
		std::vector<CharFrequencyPair>::iterator i =
			std::lower_bound (
						m_chars.begin (),
						m_chars.end (),
						ch,
						CharFrequencyPairLessThanByChar ());

		if (i != m_chars.end() && i->first == ch) {
			uint32 delta = (SCIM_MAX_CHAR_FREQUENCY - i->second);
			if (delta) {
				delta >>= shift;
				if (!delta) ++ delta;
				i->second = i->second + delta;
			}
		}
	}

	/**
	 * @sa get_key
	 */
	operator PinyinKey () const {
		return m_key;
	}

	/**
	 * output the content of this entry to ostream in text format.
	 */
	std::ostream& output_text (std::ostream &os) const;

	/**
	 * read the content of this entry from istream in text format.
	 */
	std::istream& input_text (const PinyinValidator &validator, std::istream &is);
	
	/**
	 * output in binary format.
	std::ostream& output_binary (std::ostream &os) const;
	 */

	/**
	 * input in binary format.
	std::istream& input_binary (const PinyinValidator &validator, std::istream &is);
	 */
};

/**
 * a table to store all of the Hanzi characters and its pinyin keys.
 */
class PinyinTable
{
	/*
#if defined (HAVE_HASH_MAP)
	typedef std::hash_multimap<ucs4_t,PinyinKey, std::hash <unsigned long> > ReversePinyinMap;
#elif defined (HAVE_EXT_HASH_MAP)
	typedef __gnu_cxx::hash_multimap<ucs4_t,PinyinKey, __gnu_cxx::hash <unsigned long> > ReversePinyinMap;
#else
	typedef std::multimap<ucs4_t, PinyinKey> ReversePinyinMap;
#endif

	typedef std::pair<ucs4_t,PinyinKey> ReversePinyinPair;
	*/
	typedef std::vector<PinyinEntry> PinyinEntryVector;

	/**
	 * the vector to store all of the pinyin entries.
	 */
	PinyinEntryVector m_table;

	/**
	 * the multimap to store reverse pinyin map.
	 *
	 * The reverse pinyin map is used to do Hanzi -> Pinyin mapping.
	ReversePinyinMap m_revmap;
	 */

	/**
	 * indicates that if the reverse map is OK.
	bool m_revmap_ok;
	 */

	/**
	 * less than function object of PinyinKey.
	 */
	PinyinKeyExactLessThan m_pinyin_key_less;

	/**
	 * equal to function object of PinyinKey.
	 */
	PinyinKeyExactEqualTo m_pinyin_key_equal;

	/**
	 * the validator to valdiate all of the pinyin keys.
	 */
	const PinyinValidator *m_validator;

public:
	/**
	 * constructor.
	 *
	 * @param custom the custom settings to construct less than and equal to 
	 *               function object of PinyinKey.
	 * @param validator the validator to validate all of the pinyin keys.
	 * @param tablefile the file name of pinyin table.
	 */
	PinyinTable (/*const PinyinCustomSettings &custom,*/
				 const PinyinValidator *validator,
				 const char *tablefile = NULL);

	PinyinTable (/*const PinyinCustomSettings &custom,*/
				 const PinyinValidator *validator,
				 std::istream &is);

	bool output (std::ostream &os, bool binary = false) const;
	bool input (std::istream &is);

	bool load_table (const char *tablefile);
	bool save_table (const char *tablefile, bool binary = false) const;
	/*
	void update_custom_settings (const PinyinCustomSettings &custom,
								 const PinyinValidator *validator);

	int get_all_chars (std::vector<ucs4_t> &vec) const;
	int get_all_chars_with_frequencies (std::vector<CharFrequencyPair> &vec) const;
	*/
	int find_chars (std::vector<ucs4_t> &vec, PinyinKey key) const;
	int find_chars_with_frequencies (std::vector<CharFrequencyPair> &vec, PinyinKey key) const;

	int find_keys (PinyinKeyVector &vec, ucs4_t code);

	int find_key_strings (std::vector<PinyinKeyVector> &vec, const WideString & str);

	void erase (ucs4_t hz, const char *key);
	void erase (ucs4_t hz, PinyinKey key);

	uint32 get_char_frequency (ucs4_t ch, PinyinKey key = PinyinKey ());

	void set_char_frequency (ucs4_t ch, uint32 freq, PinyinKey key = PinyinKey ());

	/**
	 * grow the char frequency by 1/(2^shift).
	 */
	void refresh (ucs4_t hz, uint32 shift = 31, PinyinKey key = PinyinKey ());

	void insert (ucs4_t hz, const char *key);
	void insert (ucs4_t hz, PinyinKey key);

	size_t size () const;

	size_t number_of_entry () const { return m_table.size (); }

	// clear this table
	void clear () {
		m_table.clear ();
		//		m_revmap.clear ();
		//m_revmap_ok = false;
	}

	bool has_key (const char *key) const;
	bool has_key (PinyinKey key) const;

private:
	/**
	 * sort all pinyin entries.
	 */
	void sort ();

	//	void create_reverse_map ();

	//void insert_to_reverse_map (ucs4_t code, PinyinKey key);
	//void erase_from_reverse_map (ucs4_t code, PinyinKey key);

	PinyinEntryVector::iterator find_exact_entry (PinyinKey key);

	void create_pinyin_key_vector_vector (std::vector<PinyinKeyVector> &vv,
										  PinyinKeyVector &key_buffer,
										  PinyinKeyVector *key_vectors,
										  int index,
										  int len);
};

inline std::ostream&
operator << (std::ostream& os, PinyinKey key)
{
	return key.output_text (os);
}

inline std::ostream&
operator << (std::ostream& os, const PinyinEntry &entry)
{
	return entry.output_text (os);
}

/**
 * @brief Write a wide char to ostream.
 *
 * The content written into the ostream will be converted into utf-8 encoding.
 *
 * @param os the stream to be written.
 * @param wc the wide char to be written to the stream.
 * @return the same stream object reference.
 */
std::ostream & utf8_write_wchar (std::ostream &os, ucs4_t wc);


#endif

/*
vi:ts=4:nowrap:ai
*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -