⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 multibyte.c

📁 postgresql-odbc,跨平台应用
💻 C
字号:
/*-------- * Module :			multibyte.c * * Description:		New Mlutibyte related additional function. * *					Create 2001-03-03 Eiji Tokuya *					New Create 2001-09-16 Eiji Tokuya *-------- */#include "multibyte.h"#include "misc.h"#include "connection.h"#include "pgapifunc.h"#include <string.h>#include <ctype.h>#include <stdio.h>#include <stdlib.h>#ifndef	TRUE#define	TRUE	1#endifstatic pg_CS CS_Table[] ={	{ "SQL_ASCII",	SQL_ASCII },	{ "EUC_JP",	EUC_JP },	{ "EUC_CN",	EUC_CN },	{ "EUC_KR",	EUC_KR },	{ "EUC_TW",	EUC_TW },	{ "JOHAB",	JOHAB },	/* since 7.3 */	{ "UTF8",	UTF8 },		/* since 7.2 */	{ "MULE_INTERNAL",MULE_INTERNAL },	{ "LATIN1",	LATIN1 },	{ "LATIN2",	LATIN2 },	{ "LATIN3",	LATIN3 },	{ "LATIN4",	LATIN4 },	{ "LATIN5",	LATIN5 },	{ "LATIN6",	LATIN6 },	{ "LATIN7",	LATIN7 },	{ "LATIN8",	LATIN8 },	{ "LATIN9",	LATIN9 },	{ "LATIN10",	LATIN10 },	{ "WIN1256",	WIN1256 },	/* Arabic since 7.3 */	{ "WIN1258",	WIN1258 },	/* Vietnamese since 8.1 */	{ "WIN866",	WIN866 },	/* since 8.1 */	{ "WIN874",	WIN874 },	/* Thai since 7.3 */	{ "KOI8",	KOI8R },	{ "WIN1251",	WIN1251 },	/* Cyrillic */	{ "WIN1252",	WIN1252 },	/* Western Europe since 8.1 */	{ "ISO_8859_5", ISO_8859_5 },	{ "ISO_8859_6", ISO_8859_6 },	{ "ISO_8859_7", ISO_8859_7 },	{ "ISO_8859_8", ISO_8859_8 },	{ "WIN1250",	WIN1250 },	/* Central Europe */	{ "WIN1253",	WIN1253 },	/* Greek since 8.2 */	{ "WIN1254",	WIN1254 },	/* Turkish since 8.2 */	{ "WIN1255",	WIN1255 },	/* Hebrew since 8.2 */	{ "WIN1257",	WIN1257 },	/* Baltic(North Europe) since 8.2 */	{ "EUC_JIS_2004", EUC_JIS_2004},	/* EUC for SHIFT-JIS-2004 Japanese, since 8.3 */	{ "SJIS",	SJIS },	{ "BIG5",	BIG5 },	{ "GBK",	GBK },		/* since 7.3 */	{ "UHC",	UHC },		/* since 7.3 */		{ "GB18030",	GB18030 },	/* since 7.3 */	{ "SHIFT_JIS_2004", SHIFT_JIS_2004 },	/* SHIFT-JIS-2004 Japanese, standard JIS X 0213, since 8.3 */	{ "OTHER",	OTHER }};static pg_CS CS_Alias[] ={	{ "UNICODE",	UTF8 },	{ "TCVN",	WIN1258 },	{ "ALT",	WIN866 },	{ "WIN",	WIN1251 },	{ "OTHER",	OTHER }};CSTR	OTHER_STRING = "OTHER";intpg_CS_code(const UCHAR *characterset_string){	int i, c = -1;  	size_t len = 0;	for(i = 0; CS_Table[i].code != OTHER; i++)	{		if (0 == stricmp(characterset_string, CS_Table[i].name))		{                       	c = CS_Table[i].code;			break;		}	}	if (c < 0)	{		for(i = 0; CS_Alias[i].code != OTHER; i++)		{			if (0 == stricmp(characterset_string, CS_Alias[i].name))			{                       		c = CS_Alias[i].code;				break;			}		}	}	if (c < 0)		c = OTHER;	return (c);}UCHAR *check_client_encoding(const UCHAR *conn_settings){	const UCHAR *cptr, *sptr = NULL;	UCHAR	*rptr;	BOOL	allowed_cmd = TRUE, in_quote = FALSE;	int	step = 0;	size_t	len = 0;        for (cptr = conn_settings; *cptr; cptr++)        {		if (in_quote)			if (LITERAL_QUOTE == *cptr)			{				in_quote = FALSE;				continue;			}		if (';' == *cptr)		{			allowed_cmd = TRUE;			step = 0;			continue;		}		if (!allowed_cmd)			continue;		if (isspace(*cptr))			continue;		switch (step)		{			case 0:				if (0 != strnicmp(cptr, "set", 3))				{					allowed_cmd = FALSE;					continue;				}				step++;				cptr += 3;				break;			case 1:				if (0 != strnicmp(cptr, "client_encoding", 15))				{					allowed_cmd = FALSE;					continue;				}				step++;				cptr += 15;				break;			case 2:				if (0 != strnicmp(cptr, "to", 2))				{					allowed_cmd = FALSE;					continue;				}				step++;				cptr += 2;				break;			case 3:				if (LITERAL_QUOTE == *cptr)				{					cptr++;					for (sptr = cptr; *cptr && *cptr != LITERAL_QUOTE; cptr++) ;				}				else				{					for (sptr = cptr; *cptr && !isspace(*cptr); cptr++) ;				}				len = cptr - sptr;				step++;				break;		}	}	if (!sptr)		return NULL;	rptr = malloc(len + 1);	memcpy(rptr, sptr, len);	rptr[len] = '\0';	mylog("extracted a client_encoding '%s' from conn_settings\n", rptr);	return rptr;}const UCHAR *pg_CS_name(int characterset_code){	int i;	for (i = 0; CS_Table[i].code != OTHER; i++)	{		if (CS_Table[i].code == characterset_code)			return CS_Table[i].name;	}	return (OTHER_STRING);}static intpg_mb_maxlen(characterset_code){	switch (characterset_code)	{		case UTF8:			return 6;		case EUC_TW:			return 4;		case EUC_JIS_2004:		case EUC_JP:		case GB18030:			return 3;		case SHIFT_JIS_2004:		case SJIS:		case BIG5:		case GBK:		case UHC:		case EUC_CN:		case EUC_KR:		case JOHAB:			return 2;		default:			return 1;	}}intpg_CS_stat(int stat,unsigned int character,int characterset_code){	if (character == 0)		stat = 0;	switch (characterset_code)	{		case UTF8:			{				if (stat < 2 &&					character >= 0x80)				{					if (character >= 0xfc)						stat = 6;					else if (character >= 0xf8)						stat = 5;					else if (character >= 0xf0)						stat = 4;					else if (character >= 0xe0)						stat = 3;					else if (character >= 0xc0)						stat = 2;				}				else if (stat > 2 &&					character > 0x7f)					stat--;				else					stat=0;			}			break;/* SHIFT_JIS_2004 Support. */			case SHIFT_JIS_2004:			{				if (stat < 2 &&					character >= 0x81 && character <= 0x9f)					stat = 2;				else if (stat < 2 &&					character >= 0xe0 && character <= 0xef)					stat = 2;				else if (stat < 2 &&					character >= 0xf0 && character <= 0xfc)					stat = 2;				else if (stat == 2)					stat = 1;				else					stat = 0;			}			break;/* Shift-JIS Support. */			case SJIS:			{				if (stat < 2 &&					character > 0x80 &&					!(character > 0x9f &&					character < 0xe0))					stat = 2;				else if (stat == 2)					stat = 1;				else					stat = 0;			}			break;/* Chinese Big5 Support. */		case BIG5:			{				if (stat < 2 &&					character > 0xA0)					stat = 2;				else if (stat == 2)					stat = 1;				else					stat = 0;			}			break;/* Chinese GBK Support. */		case GBK:			{				if (stat < 2 &&					character > 0x7F)					stat = 2;				else if (stat == 2)					stat = 1;				else					stat = 0;			}			break;/* Korian UHC Support. */		case UHC:			{				if (stat < 2 &&					character > 0x7F)					stat = 2;				else if (stat == 2)					stat = 1;				else					stat = 0;			}			break;		case EUC_JIS_2004:			/* 0x8f is JIS X 0212 + JIS X 0213(2) 3 byte */			/* 0x8e is JIS X 0201 2 byte */			/* 0xa0-0xff is JIS X 0213(1) 2 byte */		case EUC_JP:			/* 0x8f is JIS X 0212 3 byte */			/* 0x8e is JIS X 0201 2 byte */			/* 0xa0-0xff is JIS X 0208 2 byte */			{				if (stat < 3 && 					character == 0x8f)	/* JIS X 0212 */					stat = 3;				else				if (stat != 2 && 					(character == 0x8e ||					character > 0xa0))	/* Half Katakana HighByte & Kanji HighByte */					stat = 2;				else if (stat == 2)					stat = 1;				else					stat = 0;			}			break;/* EUC_CN, EUC_KR, JOHAB Support */		case EUC_CN:		case EUC_KR:		case JOHAB:			{				if (stat < 2 &&					character > 0xa0)					stat = 2;				else if (stat == 2)					stat = 1;				else					stat = 0;			}			break;		case EUC_TW:			{				if (stat < 4 &&					character == 0x8e)					stat = 4;				else if (stat == 4 &&					character > 0xa0)					stat = 3;				else if ((stat == 3 ||					stat < 2) &&					character > 0xa0)					stat = 2;				else if (stat == 2)					stat = 1;				else					stat = 0;			}			break;			/*Chinese GB18030 support.Added by Bill Huang <bhuang@redhat.com> <bill_huanghb@ybb.ne.jp>*/		case GB18030:			{				if (stat < 2 && character > 0x80)					stat = 2;				else if (stat == 2)				{					if (character >= 0x30 && character <= 0x39)						stat = 3;					else						stat = 1;				}				else if (stat == 3)				{					if (character >= 0x30 && character <= 0x39)						stat = 1;					else						stat = 3;				}				else					stat = 0;			}			break;		default:			{				stat = 0;			}			break;	}	return stat;}UCHAR *pg_mbschr(int csc, const UCHAR *string, unsigned int character){	int			mb_st = 0;	const UCHAR *s, *rs = NULL;	for(s = string; *s ; s++) 	{		mb_st = pg_CS_stat(mb_st, (UCHAR) *s, csc);		if (mb_st == 0 && (*s == character))		{			rs = s;			break;		}	}	return ((UCHAR *) rs);}size_tpg_mbslen(int csc, const UCHAR *string){	UCHAR *s;	size_t	len;	int	cs_stat;	for (len = 0, cs_stat = 0, s = (UCHAR *) string; *s != 0; s++)	{		cs_stat = pg_CS_stat(cs_stat,(unsigned int) *s, csc);		if (cs_stat < 2)			len++;	}	return len;}UCHAR *pg_mbsinc(int csc, const UCHAR *current ){	int mb_stat = 0;	if (*current != 0)	{		mb_stat = (int) pg_CS_stat(mb_stat, *current, csc);		if (mb_stat == 0)			mb_stat = 1;		return ((UCHAR *) current + mb_stat);	}	else		return NULL;}static char *CC_lookup_cs_new(ConnectionClass *self){	char		*encstr = NULL;	QResultClass	*res;	res = CC_send_query(self, "select pg_client_encoding()", NULL, IGNORE_ABORT_ON_CONN | ROLLBACK_ON_ERROR, NULL);	if (QR_command_maybe_successful(res))	{		const char 	*enc = QR_get_value_backend_text(res, 0, 0);		if (enc)			encstr = strdup(enc);	}	QR_Destructor(res);	return encstr;}static char *CC_lookup_cs_old(ConnectionClass *self){	char		*encstr = NULL;	HSTMT		hstmt;	RETCODE		result;	result = PGAPI_AllocStmt(self, &hstmt);	if (!SQL_SUCCEEDED(result))		return encstr;	result = PGAPI_ExecDirect(hstmt, "Show Client_Encoding", SQL_NTS, 0);	if (result == SQL_SUCCESS_WITH_INFO)	{		char sqlState[8], errormsg[128], enc[32];		if (PGAPI_Error(NULL, NULL, hstmt, sqlState, NULL, errormsg,			sizeof(errormsg), NULL) == SQL_SUCCESS &&		    sscanf(errormsg, "%*s %*s %*s %*s %*s %s", enc) > 0)			encstr = strdup(enc);	}	PGAPI_FreeStmt(hstmt, SQL_DROP);	return encstr;}/* *	This function works under Windows or Unicode case only. *	Simply returns NULL under other OSs. */const char * get_environment_encoding(const ConnectionClass *conn, const char *setenc, const char *currenc, BOOL bStartup){	const char *wenc = NULL;	int	acp;#ifdef	UNICODE_SUPPORT	if (CC_is_in_unicode_driver(conn))		return "UTF8";#endif /* UNICODE_SUPPORT */	if (setenc && stricmp(setenc, OTHER_STRING))		return setenc;#ifdef	WIN32	acp = GetACP();	if (acp >= 1251 && acp <= 1258)	{		if (bStartup ||		    stricmp(currenc, "SQL_ASCII") == 0)			return wenc;	}	switch (acp)	{		case 932:			wenc = "SJIS";			break;		case 936:			if (!bStartup && PG_VERSION_GT(conn, 7.2))				wenc = "GBK";			break;		case 949:			if (!bStartup && PG_VERSION_GT(conn, 7.2))  				wenc = "UHC";			break;		case 950:			wenc = "BIG5";			break;		case 1250:			wenc = "WIN1250";			break;		case 1251:			wenc = "WIN1251";			break;		case 1256:			if (PG_VERSION_GE(conn, 7.3))				wenc = "WIN1256";			break;		case 1252:			if (strnicmp(currenc, "LATIN", 5) == 0)				break;			if (PG_VERSION_GE(conn, 8.1))				wenc = "WIN1252";			else				wenc = "LATIN1";			break;		case 1258:			if (PG_VERSION_GE(conn, 8.1))				wenc = "WIN1258";			break;		case 1253:			if (PG_VERSION_GE(conn, 8.2))				wenc = "WIN1253";			break;		case 1254:			if (PG_VERSION_GE(conn, 8.2))				wenc = "WIN1254";			break;		case 1255:			if (PG_VERSION_GE(conn, 8.2))				wenc = "WIN1255";			break;		case 1257:			if (PG_VERSION_GE(conn, 8.2))				wenc = "WIN1257";			break;	}#endif /* WIN32 */	return wenc;}voidCC_lookup_characterset(ConnectionClass *self){	char	*encspec = NULL, *currenc = NULL, *tencstr;	CSTR func = "CC_lookup_characterset";	mylog("%s: entering...\n", func);	if (self->original_client_encoding)		encspec = strdup(self->original_client_encoding);	if (self->current_client_encoding)		currenc = strdup(self->current_client_encoding);	else if (PG_VERSION_LT(self, 7.2))		currenc = CC_lookup_cs_old(self);	else		currenc = CC_lookup_cs_new(self);	tencstr = encspec ? encspec : currenc;	if (self->original_client_encoding)	{		if (stricmp(self->original_client_encoding, tencstr))		{			char msg[256];			snprintf(msg, sizeof(msg), "The client_encoding '%s' was changed to '%s'", self->original_client_encoding, tencstr);			CC_set_error(self, CONN_OPTION_VALUE_CHANGED, msg, func);		}		free(self->original_client_encoding);	}#ifndef	UNICODE_SUPPORT	else	{		const char *wenc = get_environment_encoding(self, encspec, currenc, FALSE);		if (wenc && (!tencstr || stricmp(tencstr, wenc)))		{			QResultClass	*res;			char		query[64];			int		errnum = CC_get_errornumber(self);			BOOL		cmd_success;			sprintf(query, "set client_encoding to '%s'", wenc);			res = CC_send_query(self, query, NULL, IGNORE_ABORT_ON_CONN | ROLLBACK_ON_ERROR, NULL);			cmd_success = QR_command_maybe_successful(res);			QR_Destructor(res);			CC_set_errornumber(self, errnum);			if (cmd_success)			{				self->original_client_encoding = strdup(wenc);				self->ccsc = pg_CS_code(self->original_client_encoding);				if (encspec)					free(encspec);				if (currenc)					free(currenc);				return;			}		}	}#endif /* UNICODE_SUPPORT */	if (tencstr)	{		self->original_client_encoding = tencstr;		if (encspec && currenc)			free(currenc);		self->ccsc = pg_CS_code(tencstr);		qlog("    [ Client encoding = '%s' (code = %d) ]\n", self->original_client_encoding, self->ccsc);		if (self->ccsc < 0)		{			char msg[256];			snprintf(msg, sizeof(msg), "would handle the encoding '%s' like ASCII", tencstr); 			CC_set_error(self, CONN_OPTION_VALUE_CHANGED, msg, func); 		}	}	else	{		self->ccsc = SQL_ASCII;		self->original_client_encoding = NULL;	}	self->mb_maxbyte_per_char = pg_mb_maxlen(self->ccsc);}void encoded_str_constr(encoded_str *encstr, int ccsc, const char *str){	encstr->ccsc = ccsc;	encstr->encstr = str;	encstr->pos = -1;	encstr->ccst = 0;}int encoded_nextchar(encoded_str *encstr){	int	chr;	chr = encstr->encstr[++encstr->pos]; 	encstr->ccst = pg_CS_stat(encstr->ccst, (unsigned int) chr, encstr->ccsc);	return chr; }ssize_t encoded_position_shift(encoded_str *encstr, size_t shift){	encstr->pos += shift; 	return encstr->pos; }int encoded_byte_check(encoded_str *encstr, size_t abspos){	int	chr;	chr = encstr->encstr[encstr->pos = abspos]; 	encstr->ccst = pg_CS_stat(encstr->ccst, (unsigned int) chr, encstr->ccsc);	return chr; }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -