⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scim_pinyin.h

📁 拼音出入法,在LINUX上可以运行,感觉还可以,包括*.c和*.h.
💻 H
📖 第 1 页 / 共 2 页
字号:
/** @file scim_pinyin.h
 *  @brief the definitions of pinyin related classes and structs.
 */

/*
 * Smart Chinese Input Method
 * 
 * Copyright (c) 2002 James Su <suzhe@turbolinux.com.cn>
 *
 * $Id: scim_pinyin.h,v 1.2 2004/07/17 07:05:32 Lu Mu Exp $
 *
 */

#if !defined (__SCIM_PINYIN_H)
#define __SCIM_PINYIN_H
//#include "config.h"

#if defined (HAVE_HASH_MAP)
#include <hash_map>
#elif defined (HAVE_EXT_HASH_MAP)
#include <ext/hash_map>
#else
#include <map>
#endif

#include <iostream>
#include <fstream>
#include <string>
#include <vector>

#ifndef X86
#include <algo.h>
#endif

#include <stdint.h>
#include <linux/types.h>
/*
#ifdef __STDC_ISO_10646__
    typedef wchar_t ucs4_t;
#else
    typedef uint32 ucs4_t;
#endif
*/
typedef unsigned short ucs4_t;

typedef std::basic_string<char> String;
typedef std::basic_string<ucs4_t> WideString;

typedef uint16_t uint16;
typedef uint32_t uint32;
typedef uint64_t uint64;


#define SCIM_PINYIN_KEY_MAXLEN	7
#define SCIM_MAX_CHAR_FREQUENCY (~0)

//using namespace scim;
/*
class PinyinError: public Exception {
public:
	PinyinError (const String& what_arg)
		: Exception (String("Pinyin: ") + what_arg) { }
};
*/

// Predefinition of some classes and structs
class PinyinKey;
class PinyinEntry;
class PinyinTable;
class PinyinValidator;

class PinyinKeyLessThan;
class PinyinKeyEqualTo;

class PinyinKeyExactLessThan;
class PinyinKeyExactEqualTo;

struct PinyinParsedKey;
struct PinyinToken;
struct PinyinCustomSettings;

typedef std::vector<ucs4_t>           CharVector;
typedef std::vector<PinyinKey>        PinyinKeyVector;
typedef std::vector<PinyinParsedKey>  PinyinParsedKeyVector;

typedef std::pair<ucs4_t, uint32>     CharFrequencyPair;

extern const PinyinToken scim_pinyin_initials [];
extern const PinyinToken scim_pinyin_finals [];

extern const PinyinCustomSettings scim_default_custom_settings;
extern const PinyinValidator scim_default_pinyin_validator;

/**
 * enums of pinyin initial element.
 *
 * A pinyin key can be divided into three tokens:
 * Initial -- such as B P M F D T N L  etc.
 * Final   -- such as A O E I U V etc.
 * Tone    -- can be 1, 2, 3, 4 and 5.
 */
enum PinyinInitial
{
	SCIM_PINYIN_ZeroInitial = 0,	/**< zero initial. indicates invaild initial */
	SCIM_PINYIN_Bo  = 1,
	SCIM_PINYIN_Ci  = 2,
	SCIM_PINYIN_Chi = 3,
	SCIM_PINYIN_De  = 4,
	SCIM_PINYIN_Fo  = 5,
	SCIM_PINYIN_Ge  = 6,
	SCIM_PINYIN_He  = 7,
	SCIM_PINYIN_Ji  = 8,
	SCIM_PINYIN_Ke  = 9,
	SCIM_PINYIN_Le  =10,
	SCIM_PINYIN_Mo  =11,
	SCIM_PINYIN_Ne  =12,
	SCIM_PINYIN_Po  =13,
	SCIM_PINYIN_Qi  =14,
	SCIM_PINYIN_Ri  =15,
	SCIM_PINYIN_Si  =16,
	SCIM_PINYIN_Shi =17,
	SCIM_PINYIN_Te  =18,
	SCIM_PINYIN_Wo  =19,
	SCIM_PINYIN_Xi  =20,
	SCIM_PINYIN_Yi  =21,
	SCIM_PINYIN_Zi  =22,
	SCIM_PINYIN_Zhi =23,
	SCIM_PINYIN_LastInitial = SCIM_PINYIN_Zhi,	/**< the last initial */
	SCIM_PINYIN_InitialNumber = SCIM_PINYIN_LastInitial + 1
};

/**
 * enums of pinyin final element.
 */
enum PinyinFinal
{
	SCIM_PINYIN_ZeroFinal = 0,	/**< zero final. indicates invalid final */
	SCIM_PINYIN_A    = 1,
	SCIM_PINYIN_Ai   = 2,
	SCIM_PINYIN_An   = 3,
	SCIM_PINYIN_Ang  = 4,
	SCIM_PINYIN_Ao   = 5,
	SCIM_PINYIN_E    = 6,
	SCIM_PINYIN_Ei   = 7,
	SCIM_PINYIN_En   = 8,
	SCIM_PINYIN_Eng  = 9,
	SCIM_PINYIN_Er   =10,
	SCIM_PINYIN_I    =11,
	SCIM_PINYIN_Ia   =12,
	SCIM_PINYIN_Ian  =13,
	SCIM_PINYIN_Iang =14,
	SCIM_PINYIN_Iao  =15,
	SCIM_PINYIN_Ie   =16,
	SCIM_PINYIN_In   =17,
	SCIM_PINYIN_Ing  =18,
	SCIM_PINYIN_Iong =19,
	SCIM_PINYIN_Iou  =20,
	SCIM_PINYIN_Iu   =21,
	SCIM_PINYIN_Ng   =22,
	SCIM_PINYIN_O    =23,
	SCIM_PINYIN_Ong  =24,
	SCIM_PINYIN_Ou   =25,
	SCIM_PINYIN_U    =26,
	SCIM_PINYIN_Ua   =27,
	SCIM_PINYIN_Uai  =28,
	SCIM_PINYIN_Uan  =29,
	SCIM_PINYIN_Uang =30,
	SCIM_PINYIN_Ue   =31,
	SCIM_PINYIN_Uei  =32,
	SCIM_PINYIN_Uen  =33,
	SCIM_PINYIN_Ueng =34,
	SCIM_PINYIN_Ui   =35,
	SCIM_PINYIN_Un   =36,
	SCIM_PINYIN_Uo   =37,
	SCIM_PINYIN_V    =38,
	SCIM_PINYIN_Van  =39,
	SCIM_PINYIN_Ve   =40,
	SCIM_PINYIN_Vn   =41,
	SCIM_PINYIN_LastFinal = SCIM_PINYIN_Vn,	/**< the last final */
	SCIM_PINYIN_FinalNumber = SCIM_PINYIN_LastFinal + 1
};

/**
 * enums of pinyin tone element.
 */
enum PinyinTone
{
	SCIM_PINYIN_ZeroTone = 0,	/**< zero tone. this will be matched with all other tones. */
	SCIM_PINYIN_First  = 1,
	SCIM_PINYIN_Second = 2,
	SCIM_PINYIN_Third  = 3,
	SCIM_PINYIN_Fourth = 4,
	SCIM_PINYIN_Fifth  = 5,
	SCIM_PINYIN_LastTone = SCIM_PINYIN_Fifth, /**< the last tone */
	SCIM_PINYIN_ToneNumber = SCIM_PINYIN_LastTone + 1
};

/**
 * struct of pinyin token.
 *
 * this struct store the informations of a pinyin token
 * (an initial or final)
 */
struct PinyinToken
{
	char str[8];	/**< ASCII name of the token. */
       	ucs4_t wstr[4];	/**< Chinese name in unicode. */
	int len;		/**< length of ASCII name. */
       	int wlen;		/**< length of Chinese name. */
};

/**
 * Pinyin key class.
 * 
 * A pinyin key is a composed element of an initial, a final and a tone,
 * which represents one or several Chinese ideographs
 */
class PinyinKey
{
	unsigned int m_initial : 6;		/**< pinyin initial */
	unsigned int m_final : 6;		/**< pinyin final */
	unsigned int m_tone : 4;		/**< pinyin tone */
	/*
	friend class PinyinKeyLessThan;
	friend class PinyinKeyEqualTo;
	*/

	friend class PinyinKeyExactLessThan;
	friend class PinyinKeyExactEqualTo;

public:
	/**
	 * constructor.
	 *
	 * the default constructor of class PinyinKey.
	 */
	PinyinKey (PinyinInitial initial = SCIM_PINYIN_ZeroInitial,
			   PinyinFinal final     = SCIM_PINYIN_ZeroFinal,
			   PinyinTone tone       = SCIM_PINYIN_ZeroTone) {
		m_initial = initial;
		m_final = final;
		m_tone = tone;
	}

	/**
	 * constructor.
	 *
	 * construct a PinyinKey object from a key string, with
	 * specified validator.
	 *
	 * @sa PinyinValidator
	 */
	PinyinKey (const PinyinValidator &validator,
			   const char *key) {
		set_key (validator, key);
	}

	/**
	 * read the pinyin key value from a key string.
	 * 
	 * @param validator a PinyinValidator object to validate the key.
	 * @param key a ASCII string including one or more pinyin keys.
	 * @return the number of characters used by this pinyin key.
	 */ 
	int set_key (const PinyinValidator &validator,
				 const char *key,
				 int keylen = -1);

	/**
	 * set the pinyin key value to initial, final and tone.
	 */
	void set_key (PinyinInitial initial = SCIM_PINYIN_ZeroInitial,
				  PinyinFinal final     = SCIM_PINYIN_ZeroFinal,
				  PinyinTone tone       = SCIM_PINYIN_ZeroTone) {
		m_initial = initial;
		m_final = final;
		m_tone = tone;
	}

	void set_initial (PinyinInitial initial = SCIM_PINYIN_ZeroInitial) {
		m_initial = initial;
	}

	void set_final (PinyinFinal final = SCIM_PINYIN_ZeroFinal) {
		m_final = final;
	}

	void set_tone (PinyinTone tone = SCIM_PINYIN_ZeroTone) {
		m_tone = tone;
	}

	/**
	 * get pinyin initial of this key.
	 */
	PinyinInitial get_initial () const {
		return static_cast<PinyinInitial>(m_initial);
	}

	/**
	 * get pinyin final of this key.
	 */
	PinyinFinal get_final () const {
		return static_cast<PinyinFinal>(m_final);
	}

	/**
	 * get pinyin tone of this key.
	 */
	PinyinTone get_tone () const {
		return static_cast<PinyinTone>(m_tone);
	}

	/**
	 * get the ASCII name of pinyin initial of this key.
	 */
	const char* get_initial_string () const {
		return scim_pinyin_initials [m_initial].str;
	}

	/**
	 * get the Chinese name of pinyin initial of this key.
	const ucs4_t* get_initial_wide_string () const {
		return scim_pinyin_initials [m_initial].wstr;
	}
	 */

	/**
	 * get the ASCII name of pinyin final of this key.
	 */
	const char* get_final_string () const {
		return scim_pinyin_finals [m_final].str;
	}

	/**
	 * get the Chinese name of pinyin final of this key.
	const ucs4_t* get_final_wide_string () const {
		return scim_pinyin_finals [m_final].wstr;
	}
	 */

	/**
	 * get the ASCII name of this key.
	 */
	String get_key_string () const;

	/**
	 * get the Chinese name of this key.
	WideString get_key_wide_string () const;
	 */

	/**
	 * set the pinyin key to zero (invalid) key.
	 */
	bool zero() const {
		return  m_initial == SCIM_PINYIN_ZeroInitial &&
				m_final == SCIM_PINYIN_ZeroFinal;
	}

	/**
	 * output the pinyin key in text format.
	 * 
	 * @param os the ostream object to output to.
	 */
	std::ostream& output_text (std::ostream &os) const;

	/**
	 * output the pinyin key in binary format.
	 * 
	 * @param os the ostream object to output to.
	std::ostream& output_binary (std::ostream &os) const;
	 */

	/**
	 * input the pinyin key in text format.
	 *
	 * @param validator the PinyinValidator object to validate the key.
	 * @paam is the istream object to input from.
	 */
	std::istream& input_text (const PinyinValidator &validator, std::istream &is);

	/**
	 * input the pinyin key in binary format.
	 *
	 * @param validator the PinyinValidator object to validate the key.
	 * @paam is the istream object to input from.
	std::istream& input_binary (const PinyinValidator &validator, std::istream &is);
	 */

	bool operator == (PinyinKey rhs) const {
		return m_initial == rhs.m_initial && m_final == rhs.m_final && m_tone == rhs.m_tone;
	}

	bool operator != (PinyinKey rhs) const {
		return ! (*this == rhs);
	}

private:

	/**
	 * apply additional pinyin rules to the initial, final pair.
	 *
	 * for example:
	 *
	 *  SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_I  ==> SCIM_PINYIN_Yi, SCIM_PINYIN_I
	 *  SCIM_PINYIN_Ne,          SCIM_PINYIN_Ve ==> SCIM_PINYIN_Ne, SCIM_PINYIN_Ue
	 *
	 *  etc.
	 */
	void apply_additional_rules (PinyinInitial &initial,
								 PinyinFinal &final);

	/**
	 * translate an ASCII string into pinyin initial.
	 *
	 * @param initial a PinyinInitial object reference to store the result.
	 * @param key the ASCII key string.
	 * @param keylen the length of the key string.
	 * @return the number of chars actually translated.
	 */
	int parse_initial (PinyinInitial &initial,
					   const char *key,
					   int keylen);

	/**
	 * translate an ASCII string into pinyin final.
	 *
	 * @param final a PinyinFinal object reference to store the result.
	 * @param key the ASCII key string.
	 * @param keylen the length of the key string.
	 * @return the number of chars actually translated.
	 */
	int parse_final (PinyinFinal &final,
					 const char *key,
					 int keylen);
	
	/**
	 * translate an ASCII string into pinyin tone.
	 *
	 * @param tone a PinyinTone object reference to store the result.
	 * @param key the ASCII key string.
	 * @return the number of chars actually translated.
	 */
	int parse_tone (PinyinTone &tone,
				    const char *key);

	/**
	 * translate an ASCII string into initial, final and tone.
	 *
	 * @param initial store the result of pinyin initial.
	 * @param final store the result of pinyin final.
	 * @param tone store the result of pinyin tone.
	 * @param key the ASCII key string.
	 * @param keylen the length of key.
	 * @return the number of chars actually translated.
	 */
	int parse_key (PinyinInitial &initial,
				   PinyinFinal &final,
				   PinyinTone &tone,
				   const char *key,
				   int keylen);

	void extract_from_bytes (unsigned char byte0,
							 unsigned char byte1) {
		m_initial = (byte0 & 0x3F) % (SCIM_PINYIN_LastInitial+1);
		m_final = ((byte0>>6) | ((byte1 & 0xF)<<2)) % (SCIM_PINYIN_LastFinal+1);
		m_tone = (byte1>>4) % (SCIM_PINYIN_LastTone+1);
	}

	void combine_to_bytes (unsigned char *bytes) const {
		bytes[0] =  (static_cast<unsigned char>(m_initial)) |
					(static_cast<unsigned char>(m_final) << 6);
		bytes[1] =  (static_cast<unsigned char>(m_final) >> 2) |
					(static_cast<unsigned char>(m_tone) << 4);
	}

public:
	/**
	 * translate an ASCII key string into a set of valid PinyinKey objects.
	 *
	 * @param validator to validate the result pinyin keys.
	 * @param vec a PinyinParsedKey vector to store the result keys.
	 * @param key a zero ending ASCII string.
	 * @return the number of chars actually parsed.
	 */
	static int parse_pinyin_key (const PinyinValidator &validator,
								 PinyinParsedKeyVector &vec,
								 const char *key);

	static int parse_pinyin_key (const PinyinValidator &validator,
								 PinyinKeyVector &vec,
								 const char *key);
};

/**
 * Validator of PinyinKey object.
 *
 * This class is used to validate a PinyinKey object.
 */
const int PinyinValidatorBitmapSize = (SCIM_PINYIN_InitialNumber *
						SCIM_PINYIN_FinalNumber *
						SCIM_PINYIN_ToneNumber) / 8 + 1;
class PinyinValidator
{
	/**
	 * pinyin custom settings.
	 *
	 * different custom settings will lead to defferent validators.
	 */
	//PinyinCustomSettings m_custom;

	char m_bitmap [PinyinValidatorBitmapSize];

public:
	PinyinValidator (/*const PinyinCustomSettings &custom,*/
					 const PinyinTable *table = NULL);

	/**
	 * initialize the validator with specified custom settings
	 * and PinyinTable.
	 */
	void initialize (/*const PinyinCustomSettings &custom,*/
					 const PinyinTable *table = NULL);

	/**
	 * overloaded operator () function to validate a pinyin key.
	 *
	 * @param key the key to be validated.
	 * @return true = the key is valid, otherwise it's invalid.
	 */
	bool operator () (PinyinKey key) const;
};

/**
 * a binary functional class to do less than comparison
 * between two pinyin keys.
 *
 * the user custom settings will be taken account into the comparison.
class PinyinKeyLessThan
	: public std::binary_function <PinyinKey, PinyinKey, bool>
{
	PinyinCustomSettings m_custom;
public:
	PinyinKeyLessThan (const PinyinCustomSettings &custom)
		: m_custom (custom) {}

	bool operator () (PinyinKey lhs,
					  PinyinKey rhs) const;
};
 */

/**
 * a binary functional class to do equal to comparison
 * between two pinyin keys.
class PinyinKeyEqualTo
	: public std::binary_function <PinyinKey, PinyinKey, bool>
{
	PinyinCustomSettings m_custom;
public:
	PinyinKeyEqualTo (const PinyinCustomSettings &custom)
		: m_custom (custom) {}

	bool operator () (PinyinKey lhs,
					  PinyinKey rhs) const;
};
 */

/**
 * a binary functional class to do bitwise less than comparison

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -