⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scim_pinyin.cpp

📁 拼音出入法,在LINUX上可以运行,感觉还可以,包括*.c和*.h.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
/** @file scim_pinyin.cpp
 * implementation of PinyinKey, PinyinTable and related classes.
 */

/*
 * Smart Chinese Input Method
 * 
 * Copyright (c) 2002 James Su <suzhe@turbolinux.com.cn>
 *
 * $Id: scim_pinyin.cpp,v 1.2 2004/07/17 07:05:31 Lu Mu Exp $
 *
 */

#define Uses_STL_AUTOPTR
#define Uses_STL_FUNCTIONAL
#define Uses_STL_VECTOR
#define Uses_STL_IOSTREAM
#define Uses_STL_FSTREAM
#define Uses_STL_ALGORITHM
#define Uses_STL_MAP
#define Uses_STL_UTILITY
#define Uses_STL_IOMANIP
#define Uses_C_STDIO
#define Uses_SCIM_UTILITY
#define Uses_SCIM_SERVER
#define Uses_SCIM_ICONV
#define Uses_SCIM_CONFIG_BASE
#define Uses_SCIM_CONFIG_PATH
#define Uses_SCIM_LOOKUP_TABLE

//#include <scim.h>
#include <stdio.h>
#include "scim_pinyin.h"

/*
 * Sample implementation from Unicode home page.
 * http://www.stonehand.com/unicode/standard/fss-utf.html
 */
struct utf8_table {
	int     cmask;
	int     cval;
	int     shift;
	long    lmask;
	long    lval;
};

static struct utf8_table utf8_table[] =
{
    {0x80,  0x00,   0*6,    0x7F,           0,         /* 1 byte sequence */},
    {0xE0,  0xC0,   1*6,    0x7FF,          0x80,      /* 2 byte sequence */},
    {0xF0,  0xE0,   2*6,    0xFFFF,         0x800,     /* 3 byte sequence */},
    {0xF8,  0xF0,   3*6,    0x1FFFFF,       0x10000,   /* 4 byte sequence */},
    {0xFC,  0xF8,   4*6,    0x3FFFFFF,      0x200000,  /* 5 byte sequence */},
    {0xFE,  0xFC,   5*6,    0x7FFFFFFF,     0x4000000, /* 6 byte sequence */},
    {0,						       /* end of table    */}
};

int
utf8_mbtowc(ucs4_t *p, const __u8 *s, int n)
{
	long l;
	int c0, c, nc;
	struct utf8_table *t;
  
	nc = 0;
	c0 = *s;
	l = c0;
	for (t = utf8_table; t->cmask; t++) {
		nc++;
		if ((c0 & t->cmask) == t->cval) {
			l &= t->lmask;
			if (l < t->lval)
				return -1;
			*p = l;
			return nc;
		}
		if (n <= nc)
			return -1;
		s++;
		c = (*s ^ 0x80) & 0xFF;
		if (c & 0xC0)
			return -1;
		l = (l << 6) | c;
	}
	return -1;
}

int
utf8_wctomb(__u8 *s, ucs4_t wc, int maxlen)
{
	long l;
	int c, nc;
	struct utf8_table *t;
  
	if (s == 0)
		return 0;
  
	l = wc;
	nc = 0;
	for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
		nc++;
		if (l <= t->lmask) {
			c = t->shift;
			*s = t->cval | (l >> c);
			while (c > 0) {
				c -= 6;
				s++;
				*s = 0x80 | ((l >> c) & 0x3F);
			}
			return nc;
		}
	}
	return -1;
}

std::ostream &
utf8_write_wchar (std::ostream &os, ucs4_t wc)
{
    unsigned char utf8[6];
    int count = 0;

    if ((count=utf8_wctomb (utf8, wc, 6)) > 0)
        os.write ((char*)utf8, count * sizeof (unsigned char));

    return os;
}

/*
// Internal functions
static int
__scim_pinyin_compare_initial (const PinyinCustomSettings &custom,
							   PinyinInitial lhs,
							   PinyinInitial rhs);

static int
__scim_pinyin_compare_final (const PinyinCustomSettings &custom,
							 PinyinFinal lhs,
							 PinyinFinal rhs);

static int
__scim_pinyin_compare_tone (const PinyinCustomSettings &custom,
							PinyinTone lhs,
							PinyinTone rhs);
*/

// Data definition

static const char scim_pinyin_table_text_header [] = "SCIM_Pinyin_Table_TEXT";
static const char scim_pinyin_table_binary_header [] = "SCIM_Pinyin_Table_BINARY";
static const char scim_pinyin_table_version [] = "VERSION_0_4";
/*
const PinyinCustomSettings scim_default_custom_settings = 
{
	true, false, true,
	{false, false, false, false, false, false, false, false, false, false}
};
*/
const PinyinValidator scim_default_pinyin_validator;

const PinyinToken scim_pinyin_initials[] =
{
	{"", {0}, 0, 0},
	{"b", {0x3105,0}, 1, 1},
	{"c", {0x3118,0}, 1, 1},
	{"ch",{0x3114,0}, 2, 1},
	{"d", {0x3109,0}, 1, 1},
	{"f", {0x3108,0}, 1, 1},
	{"g", {0x310d,0}, 1, 1},
	{"h", {0x310f,0}, 1, 1},
	{"j", {0x3110,0}, 1, 1},
	{"k", {0x310e,0}, 1, 1},
	{"l", {0x310c,0}, 1, 1},
	{"m", {0x3107,0}, 1, 1},
	{"n", {0x310b,0}, 1, 1},
	{"p", {0x3106,0}, 1, 1},
	{"q", {0x3111,0}, 1, 1},
	{"r", {0x3116,0}, 1, 1},
	{"s", {0x3119,0}, 1, 1},
	{"sh",{0x3115,0}, 2, 1},
	{"t", {0x310a,0}, 1, 1},
	{"w", {0x3128,0}, 1, 1},
	{"x", {0x3112,0}, 1, 1},
	{"y", {0x3129,0}, 1, 1},
	{"z", {0x3117,0}, 1, 1},
	{"zh",{0x3113,0}, 2, 1}
};

const PinyinToken scim_pinyin_finals[] =
{
	{"", {0}, 0, 0},
	{"a",   {0x311a,0},        1, 1},
	{"ai",  {0x311e,0},        2, 1},
	{"an",  {0x3122,0},        2, 1},
	{"ang", {0x3124,0},        3, 1},
	{"ao",  {0x3120,0},        2, 1},
	{"e",   {0x311c,0},        1, 1},
	{"ei",  {0x311f,0},        2, 1},
	{"en",  {0x3123,0},        2, 1},
	{"eng", {0x3125,0},        3, 1},
	{"er",  {0x3126,0},        2, 1},
	{"i",   {0x3127,0},        1, 1},
	{"ia",  {0x3127,0x311a,0}, 2, 2},
	{"ian", {0x3127,0x3122,0}, 3, 2},
	{"iang",{0x3127,0x3124,0}, 4, 2},
	{"iao", {0x3127,0x3120,0}, 3, 2},
	{"ie",  {0x3127,0x311c,0}, 2, 2},
	{"in",  {0x3127,0x3123,0}, 2, 2},
	{"ing", {0x3127,0x3125,0}, 3, 2},
	{"iong",{0x3129,0x3125,0}, 4, 2},
	{"iou", {0x3127,0x3121,0}, 3, 2},
	{"iu",  {0x3127,0x3121,0}, 2, 2},
	{"ng",  {0x312b,0},        2, 1},
	{"o",   {0x311b,0},        1, 1},
	{"ong", {0x3128,0x3123,0}, 3, 2},
	{"ou",  {0x3121,0},        2, 1},
	{"u",   {0x3128,0},        1, 1},
	{"ua",  {0x3128,0x311a,0}, 2, 2},
	{"uai", {0x3128,0x311e,0}, 3, 2},
	{"uan", {0x3128,0x3122,0}, 3, 2},
	{"uang",{0x3128,0x3124,0}, 4, 2},
	{"ue",  {0x3129,0x311c,0}, 2, 2},
	{"uei", {0x3128,0x311f,0}, 3, 2},
	{"uen", {0x3128,0x3123,0}, 3, 2},
	{"ueng",{0x3128,0x3125,0}, 4, 2},
	{"ui",  {0x3128,0x311f,0}, 2, 2},
	{"un",  {0x3128,0x3123,0}, 2, 2},
	{"uo",  {0x3128,0x311b,0}, 2, 2},
	{"v",   {0x3129,0},        1, 1},
	{"van", {0x3129,0x3122,0}, 3, 2},
	{"ve",  {0x3129,0x311c,0}, 2, 2},
	{"vn",  {0x3129,0x3123,0}, 2, 2}
};

const int scim_number_of_initials = sizeof (scim_pinyin_initials) / sizeof (PinyinToken);
const int scim_number_of_finals = sizeof (scim_pinyin_finals) / sizeof (PinyinToken);

//////////////////////////////////////////////////////////////////////////////
// implementation of PinyinKey

std::ostream&
PinyinKey::output_text (std::ostream &os) const
{
	return os << get_key_string ();
}

std::istream&
PinyinKey::input_text (const PinyinValidator &validator, std::istream &is)
{
	String key;
	is >> key;
	set_key (validator, key.c_str());
	return is;
}
/*
std::ostream&
PinyinKey::output_binary (std::ostream &os) const
{
	unsigned char key [2];
	combine_to_bytes (key);
	os.write ((const char*) key, sizeof (char) * 2);
	return os;
}

std::istream&
PinyinKey::input_binary (const PinyinValidator &validator, std::istream &is)
{
	unsigned char key [2];
	is.read ((char*) key, sizeof (char) * 2);
	extract_from_bytes (key [0], key [1]);
	if (!validator (*this)) {
		m_tone = SCIM_PINYIN_ZeroTone;
		if (!validator (*this)) {
			m_final = SCIM_PINYIN_ZeroFinal;
			if (!validator (*this))
				m_initial = SCIM_PINYIN_ZeroInitial;
		}
	}
	return is;
}
*/
int
PinyinKey::parse_initial (PinyinInitial &initial,
						  const char *key,
						  int keylen)
{
	int lastlen = 0;

	for (int i=0; i<scim_number_of_initials; i++) {
		if (keylen >= scim_pinyin_initials [i].len 
			&& scim_pinyin_initials [i].len >= lastlen
			&& strncmp (scim_pinyin_initials [i].str, key,
						scim_pinyin_initials [i].len) == 0) {
			initial = static_cast<PinyinInitial>(i);
			lastlen = scim_pinyin_initials [i].len;
		}
	}

	return lastlen;
}

int
PinyinKey::parse_final (PinyinFinal &final,
						const char *key,
						int keylen)
{
	int lastlen = 0;

	for (int i=0; i<scim_number_of_finals; i++) {
		if (keylen >= scim_pinyin_finals[i].len
			&& scim_pinyin_finals[i].len >= lastlen
			&& strncmp (scim_pinyin_finals [i].str, key, scim_pinyin_finals[i].len) == 0) {
			final = static_cast<PinyinFinal>(i);
			lastlen = scim_pinyin_finals[i].len;
		}
	}

	return lastlen;
}

int
PinyinKey::parse_tone (PinyinTone &tone,
					   const char *key)
{
	int kt = (*key) - '0';
	if (kt >= SCIM_PINYIN_First && kt <= SCIM_PINYIN_LastTone) {
		tone = static_cast<PinyinTone>(kt);
		return 1;
	}
	return 0;
}

int
PinyinKey::parse_key (PinyinInitial &initial,
					  PinyinFinal &final,
					  PinyinTone &tone,
					  const char *key,
					  int keylen)
{
	if (keylen <= 0) return 0;

	initial = SCIM_PINYIN_ZeroInitial;
	final = SCIM_PINYIN_ZeroFinal;
	tone = SCIM_PINYIN_ZeroTone;

	int initial_len = 0, final_len = 0, tone_len = 0;

	final_len = parse_final (final, key, keylen);
	key += final_len;
	keylen -= final_len;

	// An initial is present
	if (final == SCIM_PINYIN_ZeroFinal) {
		initial_len = parse_initial (initial, key, keylen);
		key += initial_len;
		keylen -= initial_len;
		if (keylen){
			final_len = parse_final (final, key, keylen);
			key += final_len;
			keylen -= final_len;
		}
	}
	
	if (keylen)
		tone_len = parse_tone (tone, key);

	apply_additional_rules(initial, final);

	return initial_len + final_len + tone_len;
}

int
PinyinKey::set_key (const PinyinValidator &validator,
					const char *key,
					int keylen)
{
	if (key == NULL || key[0] == 0) {
		return 0;
	}

	m_initial = SCIM_PINYIN_ZeroInitial;
	m_final = SCIM_PINYIN_ZeroFinal;
	m_tone = SCIM_PINYIN_ZeroTone;

	PinyinInitial initial = SCIM_PINYIN_ZeroInitial;
	PinyinFinal final = SCIM_PINYIN_ZeroFinal;
	PinyinTone tone = SCIM_PINYIN_ZeroTone;

	if (keylen < 0) keylen = strlen (key);

	keylen = parse_key (initial, final, tone, key, keylen);

	while (keylen > 0 && !validator (PinyinKey (initial, final, tone)))
		keylen = parse_key (initial, final, tone, key, keylen-1);

	if (keylen) {
		m_initial = initial;
		m_final = final;
		m_tone = tone;
	}

	return keylen;
}

String
PinyinKey::get_key_string () const
{
	char key [16];
	if (m_tone)
		snprintf (key, 15, "%s%s%d", get_initial_string(), get_final_string(), m_tone);
	else
		snprintf (key, 15, "%s%s", get_initial_string(), get_final_string());

	return String (key);
}
/*
WideString
PinyinKey::get_key_wide_string () const
{
	return WideString (get_initial_wide_string ()) + WideString (get_final_wide_string());
}
*/

void
PinyinKey::apply_additional_rules (PinyinInitial &initial, PinyinFinal &final)
{
	static struct ReplaceRulePair {
		PinyinInitial initial;
		PinyinFinal   final;
		PinyinInitial new_initial;
		PinyinFinal   new_final;
	} rules [] = 
	{
	/*
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_I,    SCIM_PINYIN_Yi, SCIM_PINYIN_I},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ia,   SCIM_PINYIN_Yi, SCIM_PINYIN_A},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ian,  SCIM_PINYIN_Yi, SCIM_PINYIN_An},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Iang, SCIM_PINYIN_Yi, SCIM_PINYIN_Ang},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Iao,  SCIM_PINYIN_Yi, SCIM_PINYIN_Ao},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ie,   SCIM_PINYIN_Yi, SCIM_PINYIN_E},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_In,   SCIM_PINYIN_Yi, SCIM_PINYIN_In},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ing,  SCIM_PINYIN_Yi, SCIM_PINYIN_Ing},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Iong, SCIM_PINYIN_Yi, SCIM_PINYIN_Ong},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Iou,  SCIM_PINYIN_Yi, SCIM_PINYIN_Ou},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Iu,   SCIM_PINYIN_Yi, SCIM_PINYIN_U},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_U,    SCIM_PINYIN_Wo, SCIM_PINYIN_U},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ua,   SCIM_PINYIN_Wo, SCIM_PINYIN_A},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Uai,  SCIM_PINYIN_Wo, SCIM_PINYIN_Ai},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Uan,  SCIM_PINYIN_Wo, SCIM_PINYIN_An},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Uang, SCIM_PINYIN_Wo, SCIM_PINYIN_Ang},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Uei,  SCIM_PINYIN_Wo, SCIM_PINYIN_Ei},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Uen,  SCIM_PINYIN_Wo, SCIM_PINYIN_En},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ueng, SCIM_PINYIN_Wo, SCIM_PINYIN_Eng},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ui,   SCIM_PINYIN_Wo, SCIM_PINYIN_Ei},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Un,   SCIM_PINYIN_Wo, SCIM_PINYIN_En},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Uo,   SCIM_PINYIN_Wo, SCIM_PINYIN_O},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ue,   SCIM_PINYIN_Yi, SCIM_PINYIN_Ue},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_V,    SCIM_PINYIN_Yi, SCIM_PINYIN_U},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Van,  SCIM_PINYIN_Yi, SCIM_PINYIN_Uan},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Ve,   SCIM_PINYIN_Yi, SCIM_PINYIN_Ue},
		{SCIM_PINYIN_ZeroInitial, SCIM_PINYIN_Vn,   SCIM_PINYIN_Yi, SCIM_PINYIN_Un},
	*/
		{SCIM_PINYIN_Ne,          SCIM_PINYIN_Ve,   SCIM_PINYIN_Ne, SCIM_PINYIN_Ue},
		{SCIM_PINYIN_Le,          SCIM_PINYIN_Ve,   SCIM_PINYIN_Le, SCIM_PINYIN_Ue},
		{SCIM_PINYIN_Ji,          SCIM_PINYIN_V,    SCIM_PINYIN_Ji, SCIM_PINYIN_U},
		{SCIM_PINYIN_Ji,          SCIM_PINYIN_Van,  SCIM_PINYIN_Ji, SCIM_PINYIN_Uan},
		{SCIM_PINYIN_Ji,          SCIM_PINYIN_Ve,   SCIM_PINYIN_Ji, SCIM_PINYIN_Ue},
		{SCIM_PINYIN_Ji,          SCIM_PINYIN_Vn,   SCIM_PINYIN_Ji, SCIM_PINYIN_Un},
		{SCIM_PINYIN_Qi,          SCIM_PINYIN_V,    SCIM_PINYIN_Qi, SCIM_PINYIN_U},
		{SCIM_PINYIN_Qi,          SCIM_PINYIN_Van,  SCIM_PINYIN_Qi, SCIM_PINYIN_Uan},
		{SCIM_PINYIN_Qi,          SCIM_PINYIN_Ve,   SCIM_PINYIN_Qi, SCIM_PINYIN_Ue},
		{SCIM_PINYIN_Qi,          SCIM_PINYIN_Vn,   SCIM_PINYIN_Qi, SCIM_PINYIN_Un},
		{SCIM_PINYIN_Xi,          SCIM_PINYIN_V,    SCIM_PINYIN_Xi, SCIM_PINYIN_U},
		{SCIM_PINYIN_Xi,          SCIM_PINYIN_Van,  SCIM_PINYIN_Xi, SCIM_PINYIN_Uan},
		{SCIM_PINYIN_Xi,          SCIM_PINYIN_Ve,   SCIM_PINYIN_Xi, SCIM_PINYIN_Ue},
		{SCIM_PINYIN_Xi,          SCIM_PINYIN_Vn,   SCIM_PINYIN_Xi, SCIM_PINYIN_Un}
	};

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -