📄 xml.c
字号:
/*------------------------------------------------------------------------- * * xml.c * XML data type support. * * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.68.2.4 2008/09/16 00:49:49 tgl Exp $ * *------------------------------------------------------------------------- *//* * Generally, XML type support is only available when libxml use was * configured during the build. But even if that is not done, the * type and all the functions are available, but most of them will * fail. For one thing, this avoids having to manage variant catalog * installations. But it also has nice effects such as that you can * dump a database containing XML type data even if the server is not * linked with libxml. Thus, make sure xml_out() works even if nothing * else does. *//* * Notes on memory management: * * Via callbacks, libxml is told to use palloc and friends for memory * management, within a context that we reset at transaction end (and also at * subtransaction abort) to prevent memory leaks. Resetting at transaction or * subtransaction abort is necessary since we might have thrown a longjmp * while some data structures were not linked from anywhere persistent. * Resetting at transaction commit might not be necessary, but seems a good * idea to forestall long-term leaks. * * Sometimes libxml allocates global structures in the hope that it can reuse * them later on. Therefore, before resetting LibxmlContext, we must tell * libxml to discard any global data it has. The libxml API documentation is * not very good about specifying this, but for now we assume that * xmlCleanupParser() will get rid of anything we need to worry about. * * We use palloc --- which will throw a longjmp on error --- for allocation * callbacks that officially should act like malloc, ie, return NULL on * out-of-memory. This is a bit risky since there is a chance of leaving * persistent libxml data structures in an inconsistent partially-constructed * state, perhaps leading to crash in xmlCleanupParser(). However, as of * early 2008 it is *known* that libxml can crash on out-of-memory due to * inadequate checks for NULL returns, so this behavior seems the lesser * of two evils. */#include "postgres.h"#ifdef USE_LIBXML#include <libxml/chvalid.h>#include <libxml/parser.h>#include <libxml/tree.h>#include <libxml/uri.h>#include <libxml/xmlerror.h>#include <libxml/xmlwriter.h>#include <libxml/xpath.h>#include <libxml/xpathInternals.h>#endif /* USE_LIBXML */#include "catalog/namespace.h"#include "catalog/pg_type.h"#include "commands/dbcommands.h"#include "executor/executor.h"#include "executor/spi.h"#include "fmgr.h"#include "lib/stringinfo.h"#include "libpq/pqformat.h"#include "mb/pg_wchar.h"#include "miscadmin.h"#include "nodes/execnodes.h"#include "parser/parse_expr.h"#include "utils/array.h"#include "utils/builtins.h"#include "utils/date.h"#include "utils/datetime.h"#include "utils/lsyscache.h"#include "utils/memutils.h"#include "access/tupmacs.h"#include "utils/xml.h"/* GUC variables */XmlBinaryType xmlbinary;XmlOptionType xmloption;#ifdef USE_LIBXMLstatic StringInfo xml_err_buf = NULL;static MemoryContext LibxmlContext = NULL;static void xml_init(void);static void xml_memory_init(void);static void xml_memory_cleanup(void);static void *xml_palloc(size_t size);static void *xml_repalloc(void *ptr, size_t size);static void xml_pfree(void *ptr);static char *xml_pstrdup(const char *string);static void xml_ereport(int level, int sqlcode, const char *msg);static void xml_errorHandler(void *ctxt, const char *msg,...);static void xml_ereport_by_code(int level, int sqlcode, const char *msg, int errcode);static xmlChar *xml_text2xmlChar(text *in);static int parse_xml_decl(const xmlChar * str, size_t *lenp, xmlChar ** version, xmlChar ** encoding, int *standalone);static bool print_xml_decl(StringInfo buf, const xmlChar * version, pg_enc encoding, int standalone);static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, xmlChar * encoding);static text *xml_xmlnodetoxmltype(xmlNodePtr cur);#endif /* USE_LIBXML */static StringInfo query_to_xml_internal(const char *query, char *tablename, const char *xmlschema, bool nulls, bool tableforest, const char *targetns, bool top_level);static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls, bool tableforest, const char *targetns);static const char *map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls, bool tableforest, const char *targetns);static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls, bool tableforest, const char *targetns);static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);static void SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename, bool nulls, bool tableforest, const char *targetns, bool top_level);#define NO_XML_SUPPORT() \ ereport(ERROR, \ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ errmsg("unsupported XML feature"), \ errdetail("This functionality requires the server to be built with libxml support."), \ errhint("You need to rebuild PostgreSQL using --with-libxml.")))#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))#define _textout(x) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(x)))/* from SQL/XML:2003 section 4.7 */#define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"#define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"#define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"#ifdef USE_LIBXMLstatic intxmlChar_to_encoding(xmlChar * encoding_name){ int encoding = pg_char_to_encoding((char *) encoding_name); if (encoding < 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid encoding name \"%s\"", (char *) encoding_name))); return encoding;}#endifDatumxml_in(PG_FUNCTION_ARGS){#ifdef USE_LIBXML char *s = PG_GETARG_CSTRING(0); size_t len; xmltype *vardata; xmlDocPtr doc; len = strlen(s); vardata = palloc(len + VARHDRSZ); SET_VARSIZE(vardata, len + VARHDRSZ); memcpy(VARDATA(vardata), s, len); /* * Parse the data to check if it is well-formed XML data. Assume that * ERROR occurred if parsing failed. */ doc = xml_parse(vardata, xmloption, true, NULL); xmlFreeDoc(doc); PG_RETURN_XML_P(vardata);#else NO_XML_SUPPORT(); return 0;#endif}#define PG_XML_DEFAULT_VERSION "1.0"static char *xml_out_internal(xmltype *x, pg_enc target_encoding){ char *str; size_t len;#ifdef USE_LIBXML xmlChar *version; int standalone; int res_code;#endif len = VARSIZE(x) - VARHDRSZ; str = palloc(len + 1); memcpy(str, VARDATA(x), len); str[len] = '\0';#ifdef USE_LIBXML if ((res_code = parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone)) == 0) { StringInfoData buf; initStringInfo(&buf); if (!print_xml_decl(&buf, version, target_encoding, standalone)) { /* * If we are not going to produce an XML declaration, eat a single * newline in the original string to prevent empty first lines in * the output. */ if (*(str + len) == '\n') len += 1; } appendStringInfoString(&buf, str + len); if (version) xmlFree(version); pfree(str); return buf.data; } xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR, "could not parse XML declaration in stored value", res_code);#endif return str;}Datumxml_out(PG_FUNCTION_ARGS){ xmltype *x = PG_GETARG_XML_P(0); /* * xml_out removes the encoding property in all cases. This is because we * cannot control from here whether the datum will be converted to a * different client encoding, so we'd do more harm than good by including * it. */ PG_RETURN_CSTRING(xml_out_internal(x, 0));}Datumxml_recv(PG_FUNCTION_ARGS){#ifdef USE_LIBXML StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); xmltype *result; char *str; char *newstr; int nbytes; xmlDocPtr doc; xmlChar *encoding = NULL; /* * Read the data in raw format. We don't know yet what the encoding is, as * that information is embedded in the xml declaration; so we have to * parse that before converting to server encoding. */ nbytes = buf->len - buf->cursor; str = (char *) pq_getmsgbytes(buf, nbytes); /* * We need a null-terminated string to pass to parse_xml_decl(). Rather * than make a separate copy, make the temporary result one byte bigger * than it needs to be. */ result = palloc(nbytes + 1 + VARHDRSZ); SET_VARSIZE(result, nbytes + VARHDRSZ); memcpy(VARDATA(result), str, nbytes); str = VARDATA(result); str[nbytes] = '\0'; parse_xml_decl((xmlChar *) str, NULL, NULL, &encoding, NULL); /* * Parse the data to check if it is well-formed XML data. Assume that * xml_parse will throw ERROR if not. */ doc = xml_parse(result, xmloption, true, encoding); xmlFreeDoc(doc); /* Now that we know what we're dealing with, convert to server encoding */ newstr = (char *) pg_do_encoding_conversion((unsigned char *) str, nbytes, encoding ? xmlChar_to_encoding(encoding) : PG_UTF8, GetDatabaseEncoding()); if (newstr != str) { pfree(result); nbytes = strlen(newstr); result = palloc(nbytes + VARHDRSZ); SET_VARSIZE(result, nbytes + VARHDRSZ); memcpy(VARDATA(result), newstr, nbytes); pfree(newstr); } PG_RETURN_XML_P(result);#else NO_XML_SUPPORT(); return 0;#endif}Datumxml_send(PG_FUNCTION_ARGS){ xmltype *x = PG_GETARG_XML_P(0); char *outval; StringInfoData buf; /* * xml_out_internal doesn't convert the encoding, it just prints the right * declaration. pq_sendtext will do the conversion. */ outval = xml_out_internal(x, pg_get_client_encoding()); pq_begintypsend(&buf); pq_sendtext(&buf, outval, strlen(outval)); pfree(outval); PG_RETURN_BYTEA_P(pq_endtypsend(&buf));}#ifdef USE_LIBXMLstatic voidappendStringInfoText(StringInfo str, const text *t){ appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);}#endifstatic xmltype *stringinfo_to_xmltype(StringInfo buf){ int32 len; xmltype *result; len = buf->len + VARHDRSZ; result = palloc(len); SET_VARSIZE(result, len); memcpy(VARDATA(result), buf->data, buf->len); return result;}static xmltype *cstring_to_xmltype(const char *string){ int32 len; xmltype *result; len = strlen(string) + VARHDRSZ; result = palloc(len); SET_VARSIZE(result, len); memcpy(VARDATA(result), string, len - VARHDRSZ); return result;}#ifdef USE_LIBXMLstatic xmltype *xmlBuffer_to_xmltype(xmlBufferPtr buf){ int32 len; xmltype *result; len = xmlBufferLength(buf) + VARHDRSZ; result = palloc(len); SET_VARSIZE(result, len); memcpy(VARDATA(result), xmlBufferContent(buf), len - VARHDRSZ); return result;}#endifDatumxmlcomment(PG_FUNCTION_ARGS){#ifdef USE_LIBXML text *arg = PG_GETARG_TEXT_P(0); char *argdata = VARDATA(arg); int len = VARSIZE(arg) - VARHDRSZ; StringInfoData buf; int i; /* check for "--" in string or "-" at the end */ for (i = 1; i < len; i++) { if (argdata[i] == '-' && argdata[i - 1] == '-') ereport(ERROR, (errcode(ERRCODE_INVALID_XML_COMMENT), errmsg("invalid XML comment"))); } if (len > 0 && argdata[len - 1] == '-') ereport(ERROR, (errcode(ERRCODE_INVALID_XML_COMMENT), errmsg("invalid XML comment"))); initStringInfo(&buf); appendStringInfo(&buf, "<!--"); appendStringInfoText(&buf, arg); appendStringInfo(&buf, "-->"); PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));#else NO_XML_SUPPORT(); return 0;#endif}/* * TODO: xmlconcat needs to merge the notations and unparsed entities * of the argument values. Not very important in practice, though. */xmltype *xmlconcat(List *args){#ifdef USE_LIBXML int global_standalone = 1; xmlChar *global_version = NULL; bool global_version_no_value = false; StringInfoData buf; ListCell *v; initStringInfo(&buf); foreach(v, args) { xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v))); size_t len; xmlChar *version; int standalone; char *str; len = VARSIZE(x) - VARHDRSZ; str = palloc(len + 1); memcpy(str, VARDATA(x), len); str[len] = '\0'; parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone); if (standalone == 0 && global_standalone == 1) global_standalone = 0; if (standalone < 0) global_standalone = -1; if (!version) global_version_no_value = true; else if (!global_version) global_version = xmlStrdup(version); else if (xmlStrcmp(version, global_version) != 0) global_version_no_value = true; appendStringInfoString(&buf, str + len); pfree(str); } if (!global_version_no_value || global_standalone >= 0) { StringInfoData buf2; initStringInfo(&buf2); print_xml_decl(&buf2, (!global_version_no_value) ? global_version : NULL, 0, global_standalone); appendStringInfoString(&buf2, buf.data); buf = buf2; } return stringinfo_to_xmltype(&buf);#else NO_XML_SUPPORT(); return NULL;#endif}/* * XMLAGG support */Datumxmlconcat2(PG_FUNCTION_ARGS){ if (PG_ARGISNULL(0)) { if (PG_ARGISNULL(1)) PG_RETURN_NULL(); else PG_RETURN_XML_P(PG_GETARG_XML_P(1)); } else if (PG_ARGISNULL(1)) PG_RETURN_XML_P(PG_GETARG_XML_P(0)); else PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0), PG_GETARG_XML_P(1))));}Datumtexttoxml(PG_FUNCTION_ARGS){ text *data = PG_GETARG_TEXT_P(0); PG_RETURN_XML_P(xmlparse(data, xmloption, true));}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -