oracle_compat.c
来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 1,498 行 · 第 1/3 页
C
1,498 行
/*------------------------------------------------------------------------- * oracle_compat.c * Oracle compatible functions. * * Copyright (c) 1996-2008, PostgreSQL Global Development Group * * Author: Edmund Mergl <E.Mergl@bawue.de> * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org> * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.77 2008/01/01 19:45:52 momjian Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <ctype.h>#include <limits.h>/* * towlower() and friends should be in <wctype.h>, but some pre-C99 systems * declare them in <wchar.h>. */#ifdef HAVE_WCHAR_H#include <wchar.h>#endif#ifdef HAVE_WCTYPE_H#include <wctype.h>#endif#include "utils/builtins.h"#include "utils/pg_locale.h"#include "mb/pg_wchar.h"/* * If the system provides the needed functions for wide-character manipulation * (which are all standardized by C99), then we implement upper/lower/initcap * using wide-character functions. Otherwise we use the traditional <ctype.h> * functions, which of course will not work as desired in multibyte character * sets. Note that in either case we are effectively assuming that the * database character encoding matches the encoding implied by LC_CTYPE. * * We assume if we have these two functions, we have their friends too, and * can use the wide-character method. */#if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)#define USE_WIDE_UPPER_LOWERchar *wstring_lower(char *str);char *wstring_upper(char *str);#endifstatic text *dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim);#ifdef USE_WIDE_UPPER_LOWER/* * Convert a TEXT value into a palloc'd wchar string. */static wchar_t *texttowcs(const text *txt){ int nbytes = VARSIZE_ANY_EXHDR(txt); char *workstr; wchar_t *result; size_t ncodes; /* Overflow paranoia */ if (nbytes < 0 || nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); /* Need a null-terminated version of the input */ workstr = (char *) palloc(nbytes + 1); memcpy(workstr, VARDATA_ANY(txt), nbytes); workstr[nbytes] = '\0'; /* Output workspace cannot have more codes than input bytes */ result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); /* Do the conversion */ ncodes = mbstowcs(result, workstr, nbytes + 1); if (ncodes == (size_t) -1) { /* * Invalid multibyte character encountered. We try to give a useful * error message by letting pg_verifymbstr check the string. But it's * possible that the string is OK to us, and not OK to mbstowcs --- * this suggests that the LC_CTYPE locale is different from the * database encoding. Give a generic error message if verifymbstr * can't find anything wrong. */ pg_verifymbstr(workstr, nbytes, false); ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("invalid multibyte character for locale"), errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); } Assert(ncodes <= (size_t) nbytes); return result;}/* * Convert a wchar string into a palloc'd TEXT value. The wchar string * must be zero-terminated, but we also require the caller to pass the string * length, since it will know it anyway in current uses. */static text *wcstotext(const wchar_t *str, int ncodes){ text *result; size_t nbytes; /* Overflow paranoia */ if (ncodes < 0 || ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); /* Make workspace certainly large enough for result */ result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ); /* Do the conversion */ nbytes = wcstombs((char *) VARDATA(result), str, (ncodes + 1) * MB_CUR_MAX); if (nbytes == (size_t) -1) { /* Invalid multibyte character encountered ... shouldn't happen */ ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("invalid multibyte character for locale"))); } Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX)); SET_VARSIZE(result, nbytes + VARHDRSZ); return result;}#endif /* USE_WIDE_UPPER_LOWER *//* * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding. * To make use of the upper/lower functionality, we need to map UTF8 to * UTF16, which for some reason mbstowcs and wcstombs won't do for us. * This conversion layer takes care of it. */#ifdef WIN32/* texttowcs for the case of UTF8 to UTF16 */static wchar_t *win32_utf8_texttowcs(const text *txt){ int nbytes = VARSIZE_ANY_EXHDR(txt); wchar_t *result; int r; /* Overflow paranoia */ if (nbytes < 0 || nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); /* Output workspace cannot have more codes than input bytes */ result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); /* stupid Microsloth API does not work for zero-length input */ if (nbytes == 0) r = 0; else { /* Do the conversion */ r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes, result, nbytes); if (r <= 0) /* assume it's NO_UNICODE_TRANSLATION */ { /* see notes above about error reporting */ pg_verifymbstr(VARDATA_ANY(txt), nbytes, false); ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("invalid multibyte character for locale"), errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); } } /* Append trailing null wchar (MultiByteToWideChar won't have) */ Assert(r <= nbytes); result[r] = 0; return result;}/* wcstotext for the case of UTF16 to UTF8 */static text *win32_utf8_wcstotext(const wchar_t *str){ text *result; int nbytes; int r; /* Compute size of output string (this *will* include trailing null) */ nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL); if (nbytes <= 0) /* shouldn't happen */ ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("UTF-16 to UTF-8 translation failed: %lu", GetLastError()))); result = palloc(nbytes + VARHDRSZ); r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes, NULL, NULL); if (r != nbytes) /* shouldn't happen */ ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("UTF-16 to UTF-8 translation failed: %lu", GetLastError()))); SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */ return result;}/* interface layer to check which encoding is in use */static wchar_t *win32_texttowcs(const text *txt){ if (GetDatabaseEncoding() == PG_UTF8) return win32_utf8_texttowcs(txt); else return texttowcs(txt);}static text *win32_wcstotext(const wchar_t *str, int ncodes){ if (GetDatabaseEncoding() == PG_UTF8) return win32_utf8_wcstotext(str); else return wcstotext(str, ncodes);}/* use macros to cause routines below to call interface layer */#define texttowcs win32_texttowcs#define wcstotext win32_wcstotext#endif /* WIN32 */#ifdef USE_WIDE_UPPER_LOWER/* * string_upper and string_lower are used for correct multibyte upper/lower * transformations localized strings. Returns pointers to transformated * string. */char *wstring_upper(char *str){ wchar_t *workspace; text *in_text; text *out_text; char *result; int nbytes = strlen(str); int i; in_text = palloc(nbytes + VARHDRSZ); memcpy(VARDATA(in_text), str, nbytes); SET_VARSIZE(in_text, nbytes + VARHDRSZ); workspace = texttowcs(in_text); for (i = 0; workspace[i] != 0; i++) workspace[i] = towupper(workspace[i]); out_text = wcstotext(workspace, i); nbytes = VARSIZE(out_text) - VARHDRSZ; result = palloc(nbytes + 1); memcpy(result, VARDATA(out_text), nbytes); result[nbytes] = '\0'; pfree(workspace); pfree(in_text); pfree(out_text); return result;}char *wstring_lower(char *str){ wchar_t *workspace; text *in_text; text *out_text; char *result; int nbytes = strlen(str); int i; in_text = palloc(nbytes + VARHDRSZ); memcpy(VARDATA(in_text), str, nbytes); SET_VARSIZE(in_text, nbytes + VARHDRSZ); workspace = texttowcs(in_text); for (i = 0; workspace[i] != 0; i++) workspace[i] = towlower(workspace[i]); out_text = wcstotext(workspace, i); nbytes = VARSIZE(out_text) - VARHDRSZ; result = palloc(nbytes + 1); memcpy(result, VARDATA(out_text), nbytes); result[nbytes] = '\0'; pfree(workspace); pfree(in_text); pfree(out_text); return result;}#endif /* USE_WIDE_UPPER_LOWER *//******************************************************************** * * lower * * Syntax: * * text lower(text string) * * Purpose: * * Returns string, with all letters forced to lowercase. * ********************************************************************/Datumlower(PG_FUNCTION_ARGS){#ifdef USE_WIDE_UPPER_LOWER /* * Use wide char code only when max encoding length > 1 and ctype != C. * Some operating systems fail with multi-byte encodings and a C locale. * Also, for a C locale there is no need to process as multibyte. */ if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { text *string = PG_GETARG_TEXT_PP(0); text *result; wchar_t *workspace; int i; workspace = texttowcs(string); for (i = 0; workspace[i] != 0; i++) workspace[i] = towlower(workspace[i]); result = wcstotext(workspace, i); pfree(workspace); PG_RETURN_TEXT_P(result); } else#endif /* USE_WIDE_UPPER_LOWER */ { text *string = PG_GETARG_TEXT_P_COPY(0); char *ptr; int m; /* * Since we copied the string, we can scribble directly on the value */ ptr = VARDATA(string); m = VARSIZE(string) - VARHDRSZ; while (m-- > 0) { *ptr = tolower((unsigned char) *ptr); ptr++; } PG_RETURN_TEXT_P(string); }}/******************************************************************** * * upper * * Syntax: * * text upper(text string) * * Purpose: * * Returns string, with all letters forced to uppercase. * ********************************************************************/Datumupper(PG_FUNCTION_ARGS){#ifdef USE_WIDE_UPPER_LOWER /* * Use wide char code only when max encoding length > 1 and ctype != C. * Some operating systems fail with multi-byte encodings and a C locale. * Also, for a C locale there is no need to process as multibyte. */ if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { text *string = PG_GETARG_TEXT_PP(0); text *result; wchar_t *workspace; int i; workspace = texttowcs(string); for (i = 0; workspace[i] != 0; i++) workspace[i] = towupper(workspace[i]); result = wcstotext(workspace, i); pfree(workspace); PG_RETURN_TEXT_P(result); } else#endif /* USE_WIDE_UPPER_LOWER */ { text *string = PG_GETARG_TEXT_P_COPY(0); char *ptr; int m; /* * Since we copied the string, we can scribble directly on the value */ ptr = VARDATA(string); m = VARSIZE(string) - VARHDRSZ; while (m-- > 0) { *ptr = toupper((unsigned char) *ptr); ptr++; } PG_RETURN_TEXT_P(string); }}/******************************************************************** * * initcap * * Syntax: * * text initcap(text string) * * Purpose: * * Returns string, with first letter of each word in uppercase, all * other letters in lowercase. A word is defined as a sequence of * alphanumeric characters, delimited by non-alphanumeric * characters. * ********************************************************************/Datuminitcap(PG_FUNCTION_ARGS){#ifdef USE_WIDE_UPPER_LOWER /* * Use wide char code only when max encoding length > 1 and ctype != C. * Some operating systems fail with multi-byte encodings and a C locale. * Also, for a C locale there is no need to process as multibyte. */ if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?