oracle_compat.c

来自「PostgreSQL 8.1.4的源码 适用于Linux下的开源数据库系统」· C语言 代码 · 共 1,269 行 · 第 1/2 页

C
1,269
字号
/*------------------------------------------------------------------------- * oracle_compat.c *	Oracle compatible functions. * * Copyright (c) 1996-2005, PostgreSQL Global Development Group * *	Author: Edmund Mergl <E.Mergl@bawue.de> *	Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org> * * * IDENTIFICATION *	$PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.64 2005/11/04 22:19:04 petere Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <ctype.h>#include <limits.h>/* * towlower() and friends should be in <wctype.h>, but some pre-C99 systems * declare them in <wchar.h>. */#ifdef HAVE_WCHAR_H#include <wchar.h>#endif#ifdef HAVE_WCTYPE_H#include <wctype.h>#endif#include "utils/builtins.h"#include "utils/pg_locale.h"#include "mb/pg_wchar.h"/* * If the system provides the needed functions for wide-character manipulation * (which are all standardized by C99), then we implement upper/lower/initcap * using wide-character functions.	Otherwise we use the traditional <ctype.h> * functions, which of course will not work as desired in multibyte character * sets.  Note that in either case we are effectively assuming that the * database character encoding matches the encoding implied by LC_CTYPE. * * We assume if we have these two functions, we have their friends too, and * can use the wide-character method. */#if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)#define USE_WIDE_UPPER_LOWER#endifstatic text *dotrim(const char *string, int stringlen,	   const char *set, int setlen,	   bool doltrim, bool dortrim);#ifdef USE_WIDE_UPPER_LOWER/* * Convert a TEXT value into a palloc'd wchar string. */static wchar_t *texttowcs(const text *txt){	int			nbytes = VARSIZE(txt) - VARHDRSZ;	char	   *workstr;	wchar_t    *result;	size_t		ncodes;	/* Overflow paranoia */	if (nbytes < 0 ||		nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)		ereport(ERROR,				(errcode(ERRCODE_OUT_OF_MEMORY),				 errmsg("out of memory")));	/* Need a null-terminated version of the input */	workstr = (char *) palloc(nbytes + 1);	memcpy(workstr, VARDATA(txt), nbytes);	workstr[nbytes] = '\0';	/* Output workspace cannot have more codes than input bytes */	result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));	/* Do the conversion */	ncodes = mbstowcs(result, workstr, nbytes + 1);	if (ncodes == (size_t) -1)	{		/*		 * Invalid multibyte character encountered.  We try to give a useful		 * error message by letting pg_verifymbstr check the string.  But it's		 * possible that the string is OK to us, and not OK to mbstowcs ---		 * this suggests that the LC_CTYPE locale is different from the		 * database encoding.  Give a generic error message if verifymbstr		 * can't find anything wrong.		 */		pg_verifymbstr(workstr, nbytes, false);		ereport(ERROR,				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),				 errmsg("invalid multibyte character for locale"),				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));	}	Assert(ncodes <= (size_t) nbytes);	return result;}/* * Convert a wchar string into a palloc'd TEXT value.  The wchar string * must be zero-terminated, but we also require the caller to pass the string * length, since it will know it anyway in current uses. */static text *wcstotext(const wchar_t *str, int ncodes){	text	   *result;	size_t		nbytes;	/* Overflow paranoia */	if (ncodes < 0 ||		ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)		ereport(ERROR,				(errcode(ERRCODE_OUT_OF_MEMORY),				 errmsg("out of memory")));	/* Make workspace certainly large enough for result */	result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);	/* Do the conversion */	nbytes = wcstombs((char *) VARDATA(result), str,					  (ncodes + 1) * MB_CUR_MAX);	if (nbytes == (size_t) -1)	{		/* Invalid multibyte character encountered ... shouldn't happen */		ereport(ERROR,				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),				 errmsg("invalid multibyte character for locale")));	}	Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));	VARATT_SIZEP(result) = nbytes + VARHDRSZ;	return result;}#endif   /* USE_WIDE_UPPER_LOWER *//* * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding. * To make use of the upper/lower functionality, we need to map UTF8 to * UTF16, which for some reason mbstowcs and wcstombs won't do for us. * This conversion layer takes care of it. */#ifdef WIN32/* texttowcs for the case of UTF8 to UTF16 */static wchar_t *win32_utf8_texttowcs(const text *txt){	int			nbytes = VARSIZE(txt) - VARHDRSZ;	wchar_t    *result;	int			r;	/* Overflow paranoia */	if (nbytes < 0 ||		nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)		ereport(ERROR,				(errcode(ERRCODE_OUT_OF_MEMORY),				 errmsg("out of memory")));	/* Output workspace cannot have more codes than input bytes */	result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));	/* stupid Microsloth API does not work for zero-length input */	if (nbytes == 0)		r = 0;	else	{		/* Do the conversion */		r = MultiByteToWideChar(CP_UTF8, 0, VARDATA(txt), nbytes,								result, nbytes);		if (!r)					/* assume it's NO_UNICODE_TRANSLATION */		{			/* see notes above about error reporting */			pg_verifymbstr(VARDATA(txt), nbytes, false);			ereport(ERROR,					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),					 errmsg("invalid multibyte character for locale"),					 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));		}	}	Assert(r <= nbytes);	result[r] = 0;	return result;}/* wcstotext for the case of UTF16 to UTF8 */static text *win32_utf8_wcstotext(const wchar_t *str){	text	   *result;	int			nbytes;	int			r;	nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);	if (nbytes == 0)			/* shouldn't happen */		ereport(ERROR,				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),				 errmsg("UTF-16 to UTF-8 translation failed: %lu",						GetLastError())));	result = palloc(nbytes + VARHDRSZ);	r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,							NULL, NULL);	if (r == 0)					/* shouldn't happen */		ereport(ERROR,				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),				 errmsg("UTF-16 to UTF-8 translation failed: %lu",						GetLastError())));	VARATT_SIZEP(result) = nbytes + VARHDRSZ - 1;		/* -1 to ignore null */	return result;}/* interface layer to check which encoding is in use */static wchar_t *win32_texttowcs(const text *txt){	if (GetDatabaseEncoding() == PG_UTF8)		return win32_utf8_texttowcs(txt);	else		return texttowcs(txt);}static text *win32_wcstotext(const wchar_t *str, int ncodes){	if (GetDatabaseEncoding() == PG_UTF8)		return win32_utf8_wcstotext(str);	else		return wcstotext(str, ncodes);}/* use macros to cause routines below to call interface layer */#define texttowcs	win32_texttowcs#define wcstotext	win32_wcstotext#endif   /* WIN32 *//******************************************************************** * * lower * * Syntax: * *	 text lower(text string) * * Purpose: * *	 Returns string, with all letters forced to lowercase. * ********************************************************************/Datumlower(PG_FUNCTION_ARGS){#ifdef USE_WIDE_UPPER_LOWER	/*	 * Use wide char code only when max encoding length > 1 and ctype != C.	 * Some operating systems fail with multi-byte encodings and a C locale.	 * Also, for a C locale there is no need to process as multibyte.	 */	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())	{		text	   *string = PG_GETARG_TEXT_P(0);		text	   *result;		wchar_t    *workspace;		int			i;		workspace = texttowcs(string);		for (i = 0; workspace[i] != 0; i++)			workspace[i] = towlower(workspace[i]);		result = wcstotext(workspace, i);		pfree(workspace);		PG_RETURN_TEXT_P(result);	}	else#endif   /* USE_WIDE_UPPER_LOWER */	{		text	   *string = PG_GETARG_TEXT_P_COPY(0);		char	   *ptr;		int			m;		/*		 * Since we copied the string, we can scribble directly on the value		 */		ptr = VARDATA(string);		m = VARSIZE(string) - VARHDRSZ;		while (m-- > 0)		{			*ptr = tolower((unsigned char) *ptr);			ptr++;		}		PG_RETURN_TEXT_P(string);	}}/******************************************************************** * * upper * * Syntax: * *	 text upper(text string) * * Purpose: * *	 Returns string, with all letters forced to uppercase. * ********************************************************************/Datumupper(PG_FUNCTION_ARGS){#ifdef USE_WIDE_UPPER_LOWER	/*	 * Use wide char code only when max encoding length > 1 and ctype != C.	 * Some operating systems fail with multi-byte encodings and a C locale.	 * Also, for a C locale there is no need to process as multibyte.	 */	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())	{		text	   *string = PG_GETARG_TEXT_P(0);		text	   *result;		wchar_t    *workspace;		int			i;		workspace = texttowcs(string);		for (i = 0; workspace[i] != 0; i++)			workspace[i] = towupper(workspace[i]);		result = wcstotext(workspace, i);		pfree(workspace);		PG_RETURN_TEXT_P(result);	}	else#endif   /* USE_WIDE_UPPER_LOWER */	{		text	   *string = PG_GETARG_TEXT_P_COPY(0);		char	   *ptr;		int			m;		/*		 * Since we copied the string, we can scribble directly on the value		 */		ptr = VARDATA(string);		m = VARSIZE(string) - VARHDRSZ;		while (m-- > 0)		{			*ptr = toupper((unsigned char) *ptr);			ptr++;		}		PG_RETURN_TEXT_P(string);	}}/******************************************************************** * * initcap * * Syntax: * *	 text initcap(text string) * * Purpose: * *	 Returns string, with first letter of each word in uppercase, all *	 other letters in lowercase. A word is defined as a sequence of *	 alphanumeric characters, delimited by non-alphanumeric *	 characters. * ********************************************************************/Datuminitcap(PG_FUNCTION_ARGS){#ifdef USE_WIDE_UPPER_LOWER	/*	 * Use wide char code only when max encoding length > 1 and ctype != C.	 * Some operating systems fail with multi-byte encodings and a C locale.	 * Also, for a C locale there is no need to process as multibyte.	 */	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())	{		text	   *string = PG_GETARG_TEXT_P(0);		text	   *result;		wchar_t    *workspace;		int			wasalnum = 0;		int			i;		workspace = texttowcs(string);		for (i = 0; workspace[i] != 0; i++)		{			if (wasalnum)				workspace[i] = towlower(workspace[i]);			else				workspace[i] = towupper(workspace[i]);			wasalnum = iswalnum(workspace[i]);		}		result = wcstotext(workspace, i);		pfree(workspace);		PG_RETURN_TEXT_P(result);	}	else#endif   /* USE_WIDE_UPPER_LOWER */	{		text	   *string = PG_GETARG_TEXT_P_COPY(0);		int			wasalnum = 0;		char	   *ptr;		int			m;		/*		 * Since we copied the string, we can scribble directly on the value		 */		ptr = VARDATA(string);		m = VARSIZE(string) - VARHDRSZ;		while (m-- > 0)		{			if (wasalnum)				*ptr = tolower((unsigned char) *ptr);			else				*ptr = toupper((unsigned char) *ptr);			wasalnum = isalnum((unsigned char) *ptr);			ptr++;		}		PG_RETURN_TEXT_P(string);	}}/******************************************************************** * * lpad * * Syntax: * *	 text lpad(text string1, int4 len, text string2) * * Purpose: * *	 Returns string1, left-padded to length len with the sequence of *	 characters in string2.  If len is less than the length of string1, *	 instead truncate (on the right) to len. * ********************************************************************/Datumlpad(PG_FUNCTION_ARGS){	text	   *string1 = PG_GETARG_TEXT_P(0);	int32		len = PG_GETARG_INT32(1);	text	   *string2 = PG_GETARG_TEXT_P(2);	text	   *ret;	char	   *ptr1,			   *ptr2,			   *ptr2end,			   *ptr_ret;	int			m,				s1len,				s2len;	int			bytelen;	/* Negative len is silently taken as zero */	if (len < 0)		len = 0;	s1len = VARSIZE(string1) - VARHDRSZ;	if (s1len < 0)		s1len = 0;				/* shouldn't happen */	s2len = VARSIZE(string2) - VARHDRSZ;	if (s2len < 0)		s2len = 0;				/* shouldn't happen */	s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len);	if (s1len > len)		s1len = len;			/* truncate string1 to len chars */	if (s2len <= 0)		len = s1len;			/* nothing to pad with, so don't pad */	bytelen = pg_database_encoding_max_length() * len;	/* check for integer overflow */	if (len != 0 && bytelen / pg_database_encoding_max_length() != len)		ereport(ERROR,				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),				 errmsg("requested length too large")));	ret = (text *) palloc(VARHDRSZ + bytelen);	m = len - s1len;	ptr2 = VARDATA(string2);	ptr2end = ptr2 + s2len;	ptr_ret = VARDATA(ret);	while (m--)	{		int			mlen = pg_mblen(ptr2);		memcpy(ptr_ret, ptr2, mlen);		ptr_ret += mlen;		ptr2 += mlen;		if (ptr2 == ptr2end)	/* wrap around at end of s2 */			ptr2 = VARDATA(string2);	}	ptr1 = VARDATA(string1);	while (s1len--)	{		int			mlen = pg_mblen(ptr1);		memcpy(ptr_ret, ptr1, mlen);		ptr_ret += mlen;		ptr1 += mlen;	}	VARATT_SIZEP(ret) = ptr_ret - (char *) ret;	PG_RETURN_TEXT_P(ret);}/******************************************************************** * * rpad * * Syntax: * *	 text rpad(text string1, int4 len, text string2) * * Purpose: * *	 Returns string1, right-padded to length len with the sequence of *	 characters in string2.  If len is less than the length of string1, *	 instead truncate (on the right) to len. * ********************************************************************/Datumrpad(PG_FUNCTION_ARGS){	text	   *string1 = PG_GETARG_TEXT_P(0);	int32		len = PG_GETARG_INT32(1);	text	   *string2 = PG_GETARG_TEXT_P(2);	text	   *ret;	char	   *ptr1,			   *ptr2,			   *ptr2end,			   *ptr_ret;	int			m,				s1len,				s2len;	int			bytelen;	/* Negative len is silently taken as zero */	if (len < 0)		len = 0;	s1len = VARSIZE(string1) - VARHDRSZ;	if (s1len < 0)		s1len = 0;				/* shouldn't happen */	s2len = VARSIZE(string2) - VARHDRSZ;	if (s2len < 0)		s2len = 0;				/* shouldn't happen */	s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len);	if (s1len > len)		s1len = len;			/* truncate string1 to len chars */	if (s2len <= 0)		len = s1len;			/* nothing to pad with, so don't pad */	bytelen = pg_database_encoding_max_length() * len;	/* Check for integer overflow */	if (len != 0 && bytelen / pg_database_encoding_max_length() != len)		ereport(ERROR,				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),				 errmsg("requested length too large")));	ret = (text *) palloc(VARHDRSZ + bytelen);	m = len - s1len;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?