📄 text.cpp

📁 一个不错的关于手机模块程序This page contains everything that has changed in the history of DC++. Read this to fin
💻 CPP
字号:
/*
 * Copyright (C) 2001-2006 Jacek Sieka, arnetheduck on gmail point com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

#include "stdinc.h"
#include "DCPlusPlus.h"

#include "Text.h"

char Text::asciiLower[128];
wchar_t Text::lower[65536];

// When using GNU C library; setlocale should be called before Text::initialize

void Text::initialize() {
	for(size_t i = 0; i < 65536; ++i) {
#ifdef _WIN32
		lower[i] = (wchar_t)CharLowerW((LPWSTR)i);
#else
		lower[i] = (char)towlower(i);
#endif
	}

	for(size_t i = 0; i < 128; ++i) {
		asciiLower[i] = (char)lower[i];
	}

}

int Text::utf8ToWc(const char* str, wchar_t& c) {
	u_int8_t c0 = (u_int8_t)str[0];
	if(c0 & 0x80) {									// 1xxx xxxx
		if(c0 & 0x40) {								// 11xx xxxx
			if(c0 & 0x20) {							// 111x xxxx
				if(c0 & 0x10) {						// 1111 xxxx
					int n = -4;
					if(c0 & 0x08) {					// 1111 1xxx
						n = -5;
						if(c0 & 0x04) {				// 1111 11xx
							if(c0 & 0x02) {			// 1111 111x
								return -1;
							}
							n = -6;
						}
					}
					int i = -1;
					while(i > n && (str[abs(i)] & 0x80) == 0x80)
						--i;
					return i;
				} else {		// 1110xxxx
					u_int8_t c1 = (u_int8_t)str[1];
					if((c1 & (0x80 | 0x40)) != 0x80)
						return -1;

					u_int8_t c2 = (u_int8_t)str[2];
					if((c2 & (0x80 | 0x40)) != 0x80)
						return -2;

					// Ugly utf-16 surrogate catch
					if((c0 & 0x0f) == 0x0d && (c1 & 0x3c) >= (0x08 << 2))
						return -3;

					// Overlong encoding
					if(c0 == (0x80 | 0x40 | 0x20) && (c1 & (0x80 | 0x40 | 0x20)) == 0x80)
						return -3;

					c = (((wchar_t)c0 & 0x0f) << 12) |
						(((wchar_t)c1 & 0x3f) << 6) |
						((wchar_t)c2 & 0x3f);

					return 3;
				}
			} else {				// 110xxxxx
				u_int8_t c1 = (u_int8_t)str[1];
				if((c1 & (0x80 | 0x40)) != 0x80)
					return -1;

				// Overlong encoding
				if((c0 & ~1) == (0x80 | 0x40))
					return -2;

				c = (((wchar_t)c0 & 0x1f) << 6) |
					((wchar_t)c1 & 0x3f);
				return 2;
			}
		} else {					// 10xxxxxx
			return -1;
		}
	} else {						// 0xxxxxxx
		c = (unsigned char)str[0];
		return 1;
	}
	dcassert(0);
}

void Text::wcToUtf8(wchar_t c, string& str) {
	if(c >= 0x0800) {
		str += (char)(0x80 | 0x40 | 0x20  | (c >> 12));
		str += (char)(0x80 | ((c >> 6) & 0x3f));
		str += (char)(0x80 | (c & 0x3f));
	} else if(c >= 0x0080) {
		str += (char)(0x80 | 0x40 | (c >> 6));
		str += (char)(0x80 | (c & 0x3f));
	} else {
		str += (char)c;
	}
}

string& Text::acpToUtf8(const string& str, string& tmp) throw() {
	wstring wtmp;
	return wideToUtf8(acpToWide(str, wtmp), tmp);
}

wstring& Text::acpToWide(const string& str, wstring& tmp) throw() {
	if(str.empty())
		return tmp;
#ifdef _WIN32
	int n = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, str.c_str(), (int)str.length(), NULL, 0);
	if(n == 0) {
		return tmp;
	}

	tmp.resize(n);
	n = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, str.c_str(), (int)str.length(), &tmp[0], n);
	if(n == 0) {
		tmp.clear();
		return tmp;
	}
	return tmp;
#else
	//convert from current locale multibyte (equivalent to CP_ACP?) to wide char
	const char* src = str.c_str();
	int n = mbsrtowcs(NULL, &src, 0, NULL);
	if (n < 1) {
		return tmp;
	}
	tmp.resize(n);
	n = mbsrtowcs(&tmp[0], &src, n, NULL);
	if (n < 1) {
		tmp.clear();
		return tmp;
	}
	return tmp;
#endif
}

string& Text::wideToUtf8(const wstring& str, string& tgt) throw() {
	string::size_type n = str.length();
	for(string::size_type i = 0; i < n; ++i) {
		wcToUtf8(str[i], tgt);
	}
	return tgt;
}

string& Text::wideToAcp(const wstring& str, string& tmp) throw() {
	if(str.empty())
		return tmp;
#ifdef _WIN32
	int n = WideCharToMultiByte(CP_ACP, 0, str.c_str(), (int)str.length(), NULL, 0, NULL, NULL);
	if(n == 0) {
		return tmp;
	}

	tmp.resize(n);
	n = WideCharToMultiByte(CP_ACP, 0, str.c_str(), (int)str.length(), &tmp[0], n, NULL, NULL);
	if(n == 0) {
		tmp.clear();
		return tmp;
	}
	return tmp;
#else
	const wchar_t* src = str.c_str();
	int n = wcsrtombs(NULL, &src, 0, NULL);
	if(n < 1) {
		return tmp;
	}
	tmp.resize(n);
	n = wcsrtombs(&tmp[0], &src, n, NULL);
	if(n < 1) {
		tmp.clear();
		return tmp;
	}
	return tmp;
#endif
}

bool Text::validateUtf8(const string& str) throw() {
	string::size_type i = 0;
	while(i < str.length()) {
		wchar_t dummy = 0;
		int j = utf8ToWc(&str[i], dummy);
		if(j < 0)
			return false;
		i += j;
	}
	return true;
}

string& Text::utf8ToAcp(const string& str, string& tmp) throw() {
	wstring wtmp;
	return wideToAcp(utf8ToWide(str, wtmp), tmp);
}

wstring& Text::utf8ToWide(const string& str, wstring& tgt) throw() {
	tgt.reserve(str.length());
	string::size_type n = str.length();
	for(string::size_type i = 0; i < n; ) {
		wchar_t c = 0;
		int x = utf8ToWc(str.c_str() + i, c);
		if(x < 0) {
			tgt += '_';
			i += abs(x);
		} else {
			i += x;
			tgt += c;
		}
	}
	return tgt;
}

wstring& Text::toLower(const wstring& str, wstring& tmp) throw() {
	tmp.reserve(str.length());
	wstring::const_iterator end = str.end();
	for(wstring::const_iterator i = str.begin(); i != end; ++i) {
		tmp += toLower(*i);
	}
	return tmp;
}

string& Text::toLower(const string& str, string& tmp) throw() {
	if(str.empty())
		return tmp;
	tmp.reserve(str.length());
	const char* end = &str[0] + str.length();
	for(const char* p = &str[0]; p < end;) {
		wchar_t c = 0;
		int n = utf8ToWc(p, c);
		if(n < 0) {
			tmp += '_';
			p += abs(n);
		} else {
			p += n;
			wcToUtf8(toLower(c), tmp);
		}
	}
	return tmp;
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -