⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scim_pinyin.cpp

📁 拼音出入法,在LINUX上可以运行,感觉还可以,包括*.c和*.h.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
	for (unsigned int i=0; i<sizeof(rules)/sizeof(ReplaceRulePair); i++) {
		if (rules[i].initial == initial && rules[i].final == final) {
			initial = rules[i].new_initial;
			final = rules[i].new_final;
			break;
		}
	}

	if (initial != SCIM_PINYIN_ZeroInitial && final == SCIM_PINYIN_Iou)
		final = SCIM_PINYIN_Iu;
	if (initial != SCIM_PINYIN_ZeroInitial && final == SCIM_PINYIN_Uei)
		final = SCIM_PINYIN_Ui;
	if (initial != SCIM_PINYIN_ZeroInitial && final == SCIM_PINYIN_Uen)
		final = SCIM_PINYIN_Un;
}

int
PinyinKey::parse_pinyin_key (const PinyinValidator &validator,
							 PinyinParsedKeyVector &vec,
							 const char *key)
{
#if 0
	vec.clear ();

	int usedlen = 0;
	int keylen = strlen (key);

	if (keylen <= 0) return 0;

	PinyinParsedKey aKey;

	while (usedlen < keylen) {
		if (!isalpha (*key)) {
			key ++;
			usedlen ++;
			continue;
		}

		int len = aKey.set_key (validator, key);
		if (len) {
			aKey.set_pos (usedlen);
			aKey.set_length (len);
			vec.push_back (aKey);
		} else {
			break;
		}

		key += len;
		usedlen += len;
	}
	return usedlen;
#else
	vec.clear ();

	int keylen = strlen (key);
	if (keylen <= 0) return 0;

	PinyinParsedKey aKey;
	int usedlen = 0;
	int len;
	bool found;

	const char *key_start, *key_end;

	key_end = key + keylen;

	while (key_end > key) {

		if (*(key_end-1) == '\'') {
			--key_end;
			--keylen;
			if (keylen == 0) break;
		}

		key_start = std::max (key_end - SCIM_PINYIN_KEY_MAXLEN, key);
		found = false;

		while (key_start < key_end) {
			if (isalpha (*key_start)) {
				len = aKey.set_key (validator, key_start, key_end - key_start);
				if (len == key_end - key_start) {
					found = true;
					aKey.set_pos (key_start - key);
					aKey.set_length (len);
					usedlen += len;
					key_end = key_start;
					vec.push_back (aKey);
					break;
				}
			}
			++ key_start;
		}
		if (!found) {
			-- keylen;
			key_end = key + keylen; 
			usedlen = 0;
			vec.clear ();
		}
	}

	std::reverse (vec.begin (), vec.end ());

	return usedlen;
#endif
}

int
PinyinKey::parse_pinyin_key (const PinyinValidator &validator,
							 PinyinKeyVector &vec,
							 const char *key)
{
#if 0
	vec.clear ();

	int usedlen = 0;
	int keylen = strlen (key);

	if (keylen <= 0) return 0;

	PinyinKey aKey;

	while (usedlen < keylen) {
		if (!isalpha (*key)) {
			key ++;
			usedlen ++;
			continue;
		}

		int len = aKey.set_key (validator, key);
		if (len && validator (aKey)) {
			vec.push_back (aKey);
		} else if (!len) break;
		key += len;
		usedlen += len;
	}
	return usedlen;
#else
	vec.clear ();

	int keylen = strlen (key);
	if (keylen <= 0) return 0;

	PinyinKey aKey;
	int usedlen = 0;
	int len;
	bool found;

	const char *key_start, *key_end;

	key_end = key + keylen;

	while (key_end > key) {
		
		if (*(key_end-1) == '\'') {
			--key_end;
			--keylen;
			if (keylen == 0) break;
		}

		key_start = std::max (key_end - SCIM_PINYIN_KEY_MAXLEN, key);
		found = false;

		while (key_start < key_end) {
			if (isalpha (*key_start)) {
				len = aKey.set_key (validator, key_start, key_end - key_start);
				if (len == key_end - key_start) {
					found = true;
					usedlen += len;
					key_end = key_start;
					vec.push_back (aKey);
					break;
				}
			}
			++ key_start;
		}
		if (!found) {
			-- keylen;
			key_end = key + keylen; 
			usedlen = 0;
			vec.clear ();
		}
	}

	std::reverse (vec.begin (), vec.end ());

	return usedlen;
#endif
}

//////////////////////////////////////////////////////////////////////////////
// implementation of PinyinValidator
PinyinValidator::PinyinValidator (/*const PinyinCustomSettings &custom,(*/
								  const PinyinTable *table)
{
	initialize (/*custom, */table);
}

void
PinyinValidator::initialize (/*const PinyinCustomSettings &custom,*/
							 const PinyinTable *table)
{
	memset (m_bitmap, 0, PinyinValidatorBitmapSize);

	if (!table || table->size() <=0) return;

	for (int i=0; i<SCIM_PINYIN_InitialNumber; i++) {
		for (int j=0; j<SCIM_PINYIN_FinalNumber; j++) {
			for (int k=0; k<SCIM_PINYIN_ToneNumber; k++) {
				PinyinKey key(static_cast<PinyinInitial>(i),
							  static_cast<PinyinFinal>(j),
							  static_cast<PinyinTone>(k));
				if (!table->has_key (key)) {
					int val = (k * SCIM_PINYIN_FinalNumber + j) * SCIM_PINYIN_InitialNumber + i;
					m_bitmap [val >> 3] |= (1 << (val % 8));
				}
			}
		}
	}
}

bool
PinyinValidator::operator () (PinyinKey key) const
{
	if (key.get_initial () == SCIM_PINYIN_ZeroInitial && key.get_final () == SCIM_PINYIN_ZeroFinal)
		return false;

	int val = (key.get_tone () * SCIM_PINYIN_FinalNumber + key.get_final ()) *
				SCIM_PINYIN_InitialNumber + key.get_initial ();

	return  (m_bitmap [ val >> 3 ] & (1 << (val % 8))) == 0;
}

/*
//////////////////////////////////////////////////////////////////////////////
// implementation of PinyinKey comparision classes
static int
__scim_pinyin_compare_initial (const PinyinCustomSettings &custom,
							   PinyinInitial lhs,
							   PinyinInitial rhs)
{
	// Ambiguity LeRi, NeLe, FoHe will break binary search
	// we treat them as special cases
	if (custom.use_ambiguities [SCIM_PINYIN_AmbLeRi]) {
		if (lhs == SCIM_PINYIN_Ri) lhs = SCIM_PINYIN_Le;
		if (rhs == SCIM_PINYIN_Ri) rhs = SCIM_PINYIN_Le;
	}

	if (custom.use_ambiguities [SCIM_PINYIN_AmbNeLe]) {
		if (lhs == SCIM_PINYIN_Ne) lhs = SCIM_PINYIN_Le;
		if (rhs == SCIM_PINYIN_Ne) rhs = SCIM_PINYIN_Le;
	}

	if (custom.use_ambiguities [SCIM_PINYIN_AmbFoHe]) {
		if (lhs == SCIM_PINYIN_He) lhs = SCIM_PINYIN_Fo;
		if (rhs == SCIM_PINYIN_He) rhs = SCIM_PINYIN_Fo;
	}

	if ((lhs == rhs) ||
		(custom.use_ambiguities [SCIM_PINYIN_AmbZhiZi] &&
		 ((lhs == SCIM_PINYIN_Zhi && rhs == SCIM_PINYIN_Zi) ||
		  (lhs == SCIM_PINYIN_Zi && rhs == SCIM_PINYIN_Zhi))) ||
			  
		(custom.use_ambiguities [SCIM_PINYIN_AmbChiCi] &&
		 ((lhs == SCIM_PINYIN_Chi && rhs == SCIM_PINYIN_Ci) ||
		  (lhs == SCIM_PINYIN_Ci && rhs == SCIM_PINYIN_Chi))) ||
			  
		(custom.use_ambiguities [SCIM_PINYIN_AmbShiSi] &&
		 ((lhs == SCIM_PINYIN_Shi && rhs == SCIM_PINYIN_Si) ||
		  (lhs == SCIM_PINYIN_Si && rhs == SCIM_PINYIN_Shi)))) 
		return 0;
	else if (lhs < rhs) return -1;
	return 1;
}

static int
__scim_pinyin_compare_final (const PinyinCustomSettings &custom,
							 PinyinFinal lhs,
							 PinyinFinal rhs)
{
	if(((lhs == rhs) ||
		(custom.use_ambiguities [SCIM_PINYIN_AmbAnAng] &&
		 ((lhs == SCIM_PINYIN_An && rhs == SCIM_PINYIN_Ang) ||
		  (lhs == SCIM_PINYIN_Ang && rhs == SCIM_PINYIN_An))) ||
			  
		(custom.use_ambiguities [SCIM_PINYIN_AmbEnEng] &&
		 ((lhs == SCIM_PINYIN_En && rhs == SCIM_PINYIN_Eng) ||
		  (lhs == SCIM_PINYIN_Eng && rhs == SCIM_PINYIN_En))) ||
			  
	 	(custom.use_ambiguities [SCIM_PINYIN_AmbInIng] &&
		 ((lhs == SCIM_PINYIN_In && rhs == SCIM_PINYIN_Ing) ||
		  (lhs == SCIM_PINYIN_Ing && rhs == SCIM_PINYIN_In)))))
		return 0;
	else if (custom.use_incomplete && (lhs == SCIM_PINYIN_ZeroFinal || rhs == SCIM_PINYIN_ZeroFinal))
		return 0;
	else if (lhs < rhs) return -1;
	return 1;
}

static int
__scim_pinyin_compare_tone (const PinyinCustomSettings &custom,
							PinyinTone lhs,
							PinyinTone rhs)
{
	if(lhs == rhs || lhs == SCIM_PINYIN_ZeroTone || rhs == SCIM_PINYIN_ZeroTone || !custom.use_tone)
		return 0;
	else if (lhs < rhs) return -1;
	return 1;
}

bool
PinyinKeyLessThan::operator () (PinyinKey lhs, PinyinKey rhs) const
{
	switch (__scim_pinyin_compare_initial (m_custom,
				static_cast<PinyinInitial>(lhs.m_initial),
				static_cast<PinyinInitial>(rhs.m_initial))) {
		case 0:
			switch (__scim_pinyin_compare_final (m_custom,
					  static_cast<PinyinFinal>(lhs.m_final),
					  static_cast<PinyinFinal>(rhs.m_final))) {
				case 0:
					switch (__scim_pinyin_compare_tone (m_custom,
							 static_cast<PinyinTone>(lhs.m_tone),
							 static_cast<PinyinTone>(rhs.m_tone))) {
						case -1:
							return true;
						default:
							return false;
					}
				case -1:
					return true;
				default:
					return false;
			}
		case -1:
			return true;
		default:
			return false;
	}
	return false;
}

bool
PinyinKeyEqualTo::operator () (PinyinKey lhs, PinyinKey rhs) const
{
	if (!__scim_pinyin_compare_initial (m_custom,
			static_cast<PinyinInitial>(lhs.m_initial),
			static_cast<PinyinInitial>(rhs.m_initial)) &&
		!__scim_pinyin_compare_final (m_custom,
			static_cast<PinyinFinal>(lhs.m_final),
			static_cast<PinyinFinal>(rhs.m_final)) &&
		!__scim_pinyin_compare_tone (m_custom,
			static_cast<PinyinTone>(lhs.m_tone),
			static_cast<PinyinTone>(rhs.m_tone)))
		return true;
	return false;
}
*/
//////////////////////////////////////////////////////////////////////////////
// implementation of PinyinEntry
std::ostream&
PinyinEntry::output_text (std::ostream &os) const
{
	m_key.output_text (os) << "\t" << size() << "\t";

	for (std::vector<CharFrequencyPair>::const_iterator i = m_chars.begin(); i != m_chars.end(); i++) {
		utf8_write_wchar (os, i->first);
		os << i->second << ' ';
	}

	os << '\n';

	return os;
}
/*
std::ostream&
PinyinEntry::output_binary (std::ostream &os) const
{
	unsigned char bytes [8];

	m_key.output_binary (os);

	scim_uint32tobytes (bytes, (uint32) size());

	os.write ((char*)bytes, sizeof (unsigned char) * 4);

	for (std::vector<CharFrequencyPair>::const_iterator i = m_chars.begin(); i != m_chars.end(); i++) {
		utf8_write_wchar (os, i->first);
		scim_uint32tobytes (bytes, i->second);
		os.write ((char*)bytes, sizeof (unsigned char) * 4);
	}

	return os;
}
*/
std::istream&
PinyinEntry::input_text (const PinyinValidator &validator, std::istream &is)
{
	m_chars.clear();
	String value;
	uint32 n, len, freq;
	ucs4_t wc;

	m_key.input_text (validator, is);
	is >> n;
	m_chars.reserve (n+1);

	for (uint32 i=0; i<n; i++) {
		is >> value;
		if(strcmp(value.c_str(),"0")==0){
			continue;
		}
		if ((len = utf8_mbtowc (&wc, (const unsigned char*)(value.c_str()), value.length())) > 0) {
			if (value.length () > len)
				freq = atoi (value.c_str() + len);
			else
				freq = 0;
			m_chars.push_back (CharFrequencyPair (wc,freq));
		}
	}
	sort ();

	std::vector <CharFrequencyPair> (m_chars).swap (m_chars);

	return is;
}
/*
std::istream&
PinyinEntry::input_binary (const PinyinValidator &validator, std::istream &is)
{
	m_chars.clear();
	uint32 n, freq;
	ucs4_t wc;

	unsigned char bytes [8];

	m_key.input_binary (validator, is);

	is.read ((char*)bytes, sizeof (unsigned char) * 4);
	n = scim_bytestouint32 (bytes);
	m_chars.reserve (n+1);

	for (uint32 i=0; i<n; i++) {
		if ((wc = utf8_read_wchar (is)) > 0) {
			is.read ((char*)bytes, sizeof (unsigned char) * 4);
			freq = scim_bytestouint32 (bytes);
			m_chars.push_back (CharFrequencyPair (wc, freq));
		}
	}
	sort ();

	std::vector <CharFrequencyPair> (m_chars).swap (m_chars);

	return is;
}
*/
//////////////////////////////////////////////////////////////////////////////
// implementation of PinyinTable
PinyinTable::PinyinTable (/*const PinyinCustomSettings &custom,*/
						  const PinyinValidator *validator,
						  std::istream &is)
	: /*m_revmap_ok (false),
	  m_pinyin_key_less (custom),
	  m_pinyin_key_equal (custom),*/
	  m_validator (validator)
	  //m_custom (custom)
{
	if (!m_validator) m_validator = &scim_default_pinyin_validator;
	
	input (is);
}

PinyinTable::PinyinTable (/*(const PinyinCustomSettings &custom,*/
						  const PinyinValidator *validator,
						  const char *tablefile)
	: /*m_revmap_ok (false),
	  m_pinyin_key_less (custom),
	  m_pinyin_key_equal (custom),*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -