📄 apr_xml.c
字号:
/* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
* applicable.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "apr.h"
#include "apr_strings.h"
#define APR_WANT_STDIO /* for sprintf() */
#define APR_WANT_STRFUNC
#include "apr_want.h"
#include "apr_xml.h"
#include "apu_config.h"
#ifdef APR_HAVE_OLD_EXPAT
#include "xmlparse.h"
#else
#include "expat.h"
#endif
#define DEBUG_CR "\r\n"
/* errors related to namespace processing */
#define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000)
#define APR_XML_NS_ERROR_INVALID_DECL (-1001)
/* test for a namespace prefix that begins with [Xx][Mm][Ll] */
#define APR_XML_NS_IS_RESERVED(name) \
( (name[0] == 'X' || name[0] == 'x') && \
(name[1] == 'M' || name[1] == 'm') && \
(name[2] == 'L' || name[2] == 'l') )
/* the real (internal) definition of the parser context */
struct apr_xml_parser {
apr_xml_doc *doc; /* the doc we're parsing */
apr_pool_t *p; /* the pool we allocate from */
apr_xml_elem *cur_elem; /* current element */
int error; /* an error has occurred */
#define APR_XML_ERROR_EXPAT 1
#define APR_XML_ERROR_PARSE_DONE 2
/* also: public APR_XML_NS_ERROR_* values (if any) */
XML_Parser xp; /* the actual (Expat) XML parser */
enum XML_Error xp_err; /* stored Expat error code */
};
/* struct for scoping namespace declarations */
typedef struct apr_xml_ns_scope {
const char *prefix; /* prefix used for this ns */
int ns; /* index into namespace table */
int emptyURI; /* the namespace URI is the empty string */
struct apr_xml_ns_scope *next; /* next scoped namespace */
} apr_xml_ns_scope;
/* return namespace table index for a given prefix */
static int find_prefix(apr_xml_parser *parser, const char *prefix)
{
apr_xml_elem *elem = parser->cur_elem;
/*
** Walk up the tree, looking for a namespace scope that defines this
** prefix.
*/
for (; elem; elem = elem->parent) {
apr_xml_ns_scope *ns_scope = elem->ns_scope;
for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) {
if (strcmp(prefix, ns_scope->prefix) == 0) {
if (ns_scope->emptyURI) {
/*
** It is possible to set the default namespace to an
** empty URI string; this resets the default namespace
** to mean "no namespace." We just found the prefix
** refers to an empty URI, so return "no namespace."
*/
return APR_XML_NS_NONE;
}
return ns_scope->ns;
}
}
}
/*
* If the prefix is empty (""), this means that a prefix was not
* specified in the element/attribute. The search that was performed
* just above did not locate a default namespace URI (which is stored
* into ns_scope with an empty prefix). This means the element/attribute
* has "no namespace". We have a reserved value for this.
*/
if (*prefix == '\0') {
return APR_XML_NS_NONE;
}
/* not found */
return APR_XML_NS_ERROR_UNKNOWN_PREFIX;
}
static void start_handler(void *userdata, const char *name, const char **attrs)
{
apr_xml_parser *parser = userdata;
apr_xml_elem *elem;
apr_xml_attr *attr;
apr_xml_attr *prev;
char *colon;
const char *quoted;
char *elem_name;
/* punt once we find an error */
if (parser->error)
return;
elem = apr_pcalloc(parser->p, sizeof(*elem));
/* prep the element */
elem->name = elem_name = apr_pstrdup(parser->p, name);
/* fill in the attributes (note: ends up in reverse order) */
while (*attrs) {
attr = apr_palloc(parser->p, sizeof(*attr));
attr->name = apr_pstrdup(parser->p, *attrs++);
attr->value = apr_pstrdup(parser->p, *attrs++);
attr->next = elem->attr;
elem->attr = attr;
}
/* hook the element into the tree */
if (parser->cur_elem == NULL) {
/* no current element; this also becomes the root */
parser->cur_elem = parser->doc->root = elem;
}
else {
/* this element appeared within the current elem */
elem->parent = parser->cur_elem;
/* set up the child/sibling links */
if (elem->parent->last_child == NULL) {
/* no first child either */
elem->parent->first_child = elem->parent->last_child = elem;
}
else {
/* hook onto the end of the parent's children */
elem->parent->last_child->next = elem;
elem->parent->last_child = elem;
}
/* this element is now the current element */
parser->cur_elem = elem;
}
/* scan the attributes for namespace declarations */
for (prev = NULL, attr = elem->attr;
attr;
attr = attr->next) {
if (strncmp(attr->name, "xmlns", 5) == 0) {
const char *prefix = &attr->name[5];
apr_xml_ns_scope *ns_scope;
/* test for xmlns:foo= form and xmlns= form */
if (*prefix == ':') {
/* a namespace prefix declaration must have a
non-empty value. */
if (attr->value[0] == '\0') {
parser->error = APR_XML_NS_ERROR_INVALID_DECL;
return;
}
++prefix;
}
else if (*prefix != '\0') {
/* advance "prev" since "attr" is still present */
prev = attr;
continue;
}
/* quote the URI before we ever start working with it */
quoted = apr_xml_quote_string(parser->p, attr->value, 1);
/* build and insert the new scope */
ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope));
ns_scope->prefix = prefix;
ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted);
ns_scope->emptyURI = *quoted == '\0';
ns_scope->next = elem->ns_scope;
elem->ns_scope = ns_scope;
/* remove this attribute from the element */
if (prev == NULL)
elem->attr = attr->next;
else
prev->next = attr->next;
/* Note: prev will not be advanced since we just removed "attr" */
}
else if (strcmp(attr->name, "xml:lang") == 0) {
/* save away the language (in quoted form) */
elem->lang = apr_xml_quote_string(parser->p, attr->value, 1);
/* remove this attribute from the element */
if (prev == NULL)
elem->attr = attr->next;
else
prev->next = attr->next;
/* Note: prev will not be advanced since we just removed "attr" */
}
else {
/* advance "prev" since "attr" is still present */
prev = attr;
}
}
/*
** If an xml:lang attribute didn't exist (lang==NULL), then copy the
** language from the parent element (if present).
**
** NOTE: elem_size() *depends* upon this pointer equality.
*/
if (elem->lang == NULL && elem->parent != NULL)
elem->lang = elem->parent->lang;
/* adjust the element's namespace */
colon = strchr(elem_name, ':');
if (colon == NULL) {
/*
* The element is using the default namespace, which will always
* be found. Either it will be "no namespace", or a default
* namespace URI has been specified at some point.
*/
elem->ns = find_prefix(parser, "");
}
else if (APR_XML_NS_IS_RESERVED(elem->name)) {
elem->ns = APR_XML_NS_NONE;
}
else {
*colon = '\0';
elem->ns = find_prefix(parser, elem->name);
elem->name = colon + 1;
if (APR_XML_NS_IS_ERROR(elem->ns)) {
parser->error = elem->ns;
return;
}
}
/* adjust all remaining attributes' namespaces */
for (attr = elem->attr; attr; attr = attr->next) {
/*
* apr_xml_attr defines this as "const" but we dup'd it, so we
* know that we can change it. a bit hacky, but the existing
* structure def is best.
*/
char *attr_name = (char *)attr->name;
colon = strchr(attr_name, ':');
if (colon == NULL) {
/*
* Attributes do NOT use the default namespace. Therefore,
* we place them into the "no namespace" category.
*/
attr->ns = APR_XML_NS_NONE;
}
else if (APR_XML_NS_IS_RESERVED(attr->name)) {
attr->ns = APR_XML_NS_NONE;
}
else {
*colon = '\0';
attr->ns = find_prefix(parser, attr->name);
attr->name = colon + 1;
if (APR_XML_NS_IS_ERROR(attr->ns)) {
parser->error = attr->ns;
return;
}
}
}
}
static void end_handler(void *userdata, const char *name)
{
apr_xml_parser *parser = userdata;
/* punt once we find an error */
if (parser->error)
return;
/* pop up one level */
parser->cur_elem = parser->cur_elem->parent;
}
static void cdata_handler(void *userdata, const char *data, int len)
{
apr_xml_parser *parser = userdata;
apr_xml_elem *elem;
apr_text_header *hdr;
const char *s;
/* punt once we find an error */
if (parser->error)
return;
elem = parser->cur_elem;
s = apr_pstrndup(parser->p, data, len);
if (elem->last_child == NULL) {
/* no children yet. this cdata follows the start tag */
hdr = &elem->first_cdata;
}
else {
/* child elements exist. this cdata follows the last child. */
hdr = &elem->last_child->following_cdata;
}
apr_text_append(parser->p, hdr, s);
}
static apr_status_t cleanup_parser(void *ctx)
{
apr_xml_parser *parser = ctx;
XML_ParserFree(parser->xp);
parser->xp = NULL;
return APR_SUCCESS;
}
APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool)
{
apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
parser->p = pool;
parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
/* ### is there a way to avoid hard-coding this? */
apr_xml_insert_uri(parser->doc->namespaces, "DAV:");
parser->xp = XML_ParserCreate(NULL);
if (parser->xp == NULL) {
(*apr_pool_get_abort(pool))(APR_ENOMEM);
return NULL;
}
apr_pool_cleanup_register(pool, parser, cleanup_parser,
apr_pool_cleanup_null);
XML_SetUserData(parser->xp, parser);
XML_SetElementHandler(parser->xp, start_handler, end_handler);
XML_SetCharacterDataHandler(parser->xp, cdata_handler);
return parser;
}
static apr_status_t do_parse(apr_xml_parser *parser,
const char *data, apr_size_t len,
int is_final)
{
if (parser->xp == NULL) {
parser->error = APR_XML_ERROR_PARSE_DONE;
}
else {
int rv = XML_Parse(parser->xp, data, len, is_final);
if (rv == 0) {
parser->error = APR_XML_ERROR_EXPAT;
parser->xp_err = XML_GetErrorCode(parser->xp);
}
}
/* ### better error code? */
return parser->error ? APR_EGENERAL : APR_SUCCESS;
}
APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
const char *data,
apr_size_t len)
{
return do_parse(parser, data, len, 0 /* is_final */);
}
APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
apr_xml_doc **pdoc)
{
char end;
apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */);
/* get rid of the parser */
(void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser);
if (status)
return status;
if (pdoc != NULL)
*pdoc = parser->doc;
return APR_SUCCESS;
}
APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
char *errbuf,
apr_size_t errbufsize)
{
int error = parser->error;
const char *msg;
/* clear our record of an error */
parser->error = 0;
switch (error) {
case 0:
msg = "No error.";
break;
case APR_XML_NS_ERROR_UNKNOWN_PREFIX:
msg = "An undefined namespace prefix was used.";
break;
case APR_XML_NS_ERROR_INVALID_DECL:
msg = "A namespace prefix was defined with an empty URI.";
break;
case APR_XML_ERROR_EXPAT:
(void) apr_snprintf(errbuf, errbufsize,
"XML parser error code: %s (%d)",
XML_ErrorString(parser->xp_err), parser->xp_err);
return errbuf;
case APR_XML_ERROR_PARSE_DONE:
msg = "The parser is not active.";
break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -