📄 varlena.c
字号:
/*------------------------------------------------------------------------- * * varlena.c * Functions for the variable-length built-in types. * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.139.2.3 2006/05/21 20:05:48 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <ctype.h>#include "access/tuptoaster.h"#include "catalog/pg_type.h"#include "lib/stringinfo.h"#include "libpq/crypt.h"#include "libpq/pqformat.h"#include "mb/pg_wchar.h"#include "miscadmin.h"#include "parser/scansup.h"#include "regex/regex.h"#include "utils/array.h"#include "utils/builtins.h"#include "utils/lsyscache.h"#include "utils/pg_locale.h"typedef struct varlena unknown;#define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))#define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))#define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))#define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))#define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)#define PG_TEXTARG_GET_STR(arg_) \ DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))#define PG_TEXT_GET_STR(textp_) \ DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))#define PG_STR_GET_TEXT(str_) \ DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))#define TEXTLEN(textp) \ text_length(PointerGetDatum(textp))#define TEXTPOS(buf_text, from_sub_text) \ text_position(buf_text, from_sub_text, 1)#define LEFT(buf_text, from_sub_text) \ text_substring(PointerGetDatum(buf_text), \ 1, \ TEXTPOS(buf_text, from_sub_text) - 1, false)static int text_cmp(text *arg1, text *arg2);static int32 text_length(Datum str);static int32 text_position(text *t1, text *t2, int matchnum);static text *text_substring(Datum str, int32 start, int32 length, bool length_not_specified);static void appendStringInfoText(StringInfo str, const text *t);/***************************************************************************** * USER I/O ROUTINES * *****************************************************************************/#define VAL(CH) ((CH) - '0')#define DIG(VAL) ((VAL) + '0')/* * byteain - converts from printable representation of byte array * * Non-printable characters must be passed as '\nnn' (octal) and are * converted to internal form. '\' must be passed as '\\'. * ereport(ERROR, ...) if bad form. * * BUGS: * The input is scaned twice. * The error checking of input is minimal. */Datumbyteain(PG_FUNCTION_ARGS){ char *inputText = PG_GETARG_CSTRING(0); char *tp; char *rp; int byte; bytea *result; for (byte = 0, tp = inputText; *tp != '\0'; byte++) { if (tp[0] != '\\') tp++; else if ((tp[0] == '\\') && (tp[1] >= '0' && tp[1] <= '3') && (tp[2] >= '0' && tp[2] <= '7') && (tp[3] >= '0' && tp[3] <= '7')) tp += 4; else if ((tp[0] == '\\') && (tp[1] == '\\')) tp += 2; else { /* * one backslash, not followed by 0 or ### valid octal */ ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type bytea"))); } } byte += VARHDRSZ; result = (bytea *) palloc(byte); VARATT_SIZEP(result) = byte; /* set varlena length */ tp = inputText; rp = VARDATA(result); while (*tp != '\0') { if (tp[0] != '\\') *rp++ = *tp++; else if ((tp[0] == '\\') && (tp[1] >= '0' && tp[1] <= '3') && (tp[2] >= '0' && tp[2] <= '7') && (tp[3] >= '0' && tp[3] <= '7')) { byte = VAL(tp[1]); byte <<= 3; byte += VAL(tp[2]); byte <<= 3; *rp++ = byte + VAL(tp[3]); tp += 4; } else if ((tp[0] == '\\') && (tp[1] == '\\')) { *rp++ = '\\'; tp += 2; } else { /* * We should never get here. The first pass should not allow it. */ ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type bytea"))); } } PG_RETURN_BYTEA_P(result);}/* * byteaout - converts to printable representation of byte array * * Non-printable characters are inserted as '\nnn' (octal) and '\' as * '\\'. * * NULL vlena should be an error--returning string with NULL for now. */Datumbyteaout(PG_FUNCTION_ARGS){ bytea *vlena = PG_GETARG_BYTEA_P(0); char *result; char *vp; char *rp; int val; /* holds unprintable chars */ int i; int len; len = 1; /* empty string has 1 char */ vp = VARDATA(vlena); for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++) { if (*vp == '\\') len += 2; else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) len += 4; else len++; } rp = result = (char *) palloc(len); vp = VARDATA(vlena); for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++) { if (*vp == '\\') { *rp++ = '\\'; *rp++ = '\\'; } else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) { val = *vp; rp[0] = '\\'; rp[3] = DIG(val & 07); val >>= 3; rp[2] = DIG(val & 07); val >>= 3; rp[1] = DIG(val & 03); rp += 4; } else *rp++ = *vp; } *rp = '\0'; PG_RETURN_CSTRING(result);}/* * bytearecv - converts external binary format to bytea */Datumbytearecv(PG_FUNCTION_ARGS){ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); bytea *result; int nbytes; nbytes = buf->len - buf->cursor; result = (bytea *) palloc(nbytes + VARHDRSZ); VARATT_SIZEP(result) = nbytes + VARHDRSZ; pq_copymsgbytes(buf, VARDATA(result), nbytes); PG_RETURN_BYTEA_P(result);}/* * byteasend - converts bytea to binary format * * This is a special case: just copy the input... */Datumbyteasend(PG_FUNCTION_ARGS){ bytea *vlena = PG_GETARG_BYTEA_P_COPY(0); PG_RETURN_BYTEA_P(vlena);}/* * textin - converts "..." to internal representation */Datumtextin(PG_FUNCTION_ARGS){ char *inputText = PG_GETARG_CSTRING(0); text *result; int len; len = strlen(inputText); result = (text *) palloc(len + VARHDRSZ); VARATT_SIZEP(result) = len + VARHDRSZ; memcpy(VARDATA(result), inputText, len); PG_RETURN_TEXT_P(result);}/* * textout - converts internal representation to "..." */Datumtextout(PG_FUNCTION_ARGS){ text *t = PG_GETARG_TEXT_P(0); int len; char *result; len = VARSIZE(t) - VARHDRSZ; result = (char *) palloc(len + 1); memcpy(result, VARDATA(t), len); result[len] = '\0'; PG_RETURN_CSTRING(result);}/* * textrecv - converts external binary format to text */Datumtextrecv(PG_FUNCTION_ARGS){ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); text *result; char *str; int nbytes; str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); result = (text *) palloc(nbytes + VARHDRSZ); VARATT_SIZEP(result) = nbytes + VARHDRSZ; memcpy(VARDATA(result), str, nbytes); pfree(str); PG_RETURN_TEXT_P(result);}/* * textsend - converts text to binary format */Datumtextsend(PG_FUNCTION_ARGS){ text *t = PG_GETARG_TEXT_P(0); StringInfoData buf; pq_begintypsend(&buf); pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ); PG_RETURN_BYTEA_P(pq_endtypsend(&buf));}/* * unknownin - converts "..." to internal representation */Datumunknownin(PG_FUNCTION_ARGS){ char *str = PG_GETARG_CSTRING(0); /* representation is same as cstring */ PG_RETURN_CSTRING(pstrdup(str));}/* * unknownout - converts internal representation to "..." */Datumunknownout(PG_FUNCTION_ARGS){ /* representation is same as cstring */ char *str = PG_GETARG_CSTRING(0); PG_RETURN_CSTRING(pstrdup(str));}/* * unknownrecv - converts external binary format to unknown */Datumunknownrecv(PG_FUNCTION_ARGS){ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); char *str; int nbytes; str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); /* representation is same as cstring */ PG_RETURN_CSTRING(str);}/* * unknownsend - converts unknown to binary format */Datumunknownsend(PG_FUNCTION_ARGS){ /* representation is same as cstring */ char *str = PG_GETARG_CSTRING(0); StringInfoData buf; pq_begintypsend(&buf); pq_sendtext(&buf, str, strlen(str)); PG_RETURN_BYTEA_P(pq_endtypsend(&buf));}/* ========== PUBLIC ROUTINES ========== *//* * textlen - * returns the logical length of a text* * (which is less than the VARSIZE of the text*) */Datumtextlen(PG_FUNCTION_ARGS){ Datum str = PG_GETARG_DATUM(0); /* try to avoid decompressing argument */ PG_RETURN_INT32(text_length(str));}/* * text_length - * Does the real work for textlen() * * This is broken out so it can be called directly by other string processing * functions. Note that the argument is passed as a Datum, to indicate that * it may still be in compressed form. We can avoid decompressing it at all * in some cases. */static int32text_length(Datum str){ /* fastpath when max encoding length is one */ if (pg_database_encoding_max_length() == 1) PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); else { text *t = DatumGetTextP(str); PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t), VARSIZE(t) - VARHDRSZ)); }}/* * textoctetlen - * returns the physical length of a text* * (which is less than the VARSIZE of the text*) */Datumtextoctetlen(PG_FUNCTION_ARGS){ Datum str = PG_GETARG_DATUM(0); /* We need not detoast the input at all */ PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);}/* * textcat - * takes two text* and returns a text* that is the concatenation of * the two. * * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96. * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10. * Allocate space for output in all cases. * XXX - thomas 1997-07-10 */Datumtextcat(PG_FUNCTION_ARGS){ text *t1 = PG_GETARG_TEXT_P(0); text *t2 = PG_GETARG_TEXT_P(1); int len1, len2, len; text *result; char *ptr; len1 = VARSIZE(t1) - VARHDRSZ; if (len1 < 0) len1 = 0; len2 = VARSIZE(t2) - VARHDRSZ; if (len2 < 0) len2 = 0; len = len1 + len2 + VARHDRSZ; result = (text *) palloc(len); /* Set size of result string... */ VARATT_SIZEP(result) = len; /* Fill data field of result string... */ ptr = VARDATA(result); if (len1 > 0) memcpy(ptr, VARDATA(t1), len1); if (len2 > 0) memcpy(ptr + len1, VARDATA(t2), len2); PG_RETURN_TEXT_P(result);}/* * text_substr() * Return a substring starting at the specified position. * - thomas 1997-12-31 * * Input: * - string * - starting position (is one-based) * - string length * * If the starting position is zero or less, then return from the start of the string * adjusting the length to be consistent with the "negative start" per SQL92. * If the length is less than zero, return the remaining string. * * Added multibyte support. * - Tatsuo Ishii 1998-4-21 * Changed behavior if starting position is less than one to conform to SQL92 behavior. * Formerly returned the entire string; now returns a portion. * - Thomas Lockhart 1998-12-10 * Now uses faster TOAST-slicing interface * - John Gray 2002-02-22 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw * error; if E < 1, return '', not entire string). Fixed MB related bug when * S > LC and < LC + 4 sometimes garbage characters are returned. * - Joe Conway 2002-08-10 */Datumtext_substr(PG_FUNCTION_ARGS){ PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), PG_GETARG_INT32(1), PG_GETARG_INT32(2), false));}/* * text_substr_no_len - * Wrapper to avoid opr_sanity failure due to * one function accepting a different number of args. */Datumtext_substr_no_len(PG_FUNCTION_ARGS){ PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), PG_GETARG_INT32(1), -1, true));}/* * text_substring - * Does the real work for text_substr() and text_substr_no_len() * * This is broken out so it can be called directly by other string processing * functions. Note that the argument is passed as a Datum, to indicate that * it may still be in compressed/toasted form. We can avoid detoasting all * of it in some cases. */static text *text_substring(Datum str, int32 start, int32 length, bool length_not_specified){ int32 eml = pg_database_encoding_max_length(); int32 S = start; /* start position */ int32 S1; /* adjusted start position */ int32 L1; /* adjusted substring length */ /* life is easy if the encoding max length is 1 */ if (eml == 1) { S1 = Max(S, 1); if (length_not_specified) /* special case - get length to end of
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -