⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 apr_xml.c

📁 Apache 2.0.63 is the current stable version of the 2.0 series, and is recommended over any previous
💻 C
📖 第 1 页 / 共 2 页
字号:
/* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
 * applicable.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "apr.h"
#include "apr_strings.h"

#define APR_WANT_STDIO          /* for sprintf() */
#define APR_WANT_STRFUNC
#include "apr_want.h"

#include "apr_xml.h"

#include "apu_config.h"

#ifdef APR_HAVE_OLD_EXPAT
#include "xmlparse.h"
#else
#include "expat.h"
#endif

#define DEBUG_CR "\r\n"

/* errors related to namespace processing */
#define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000)
#define APR_XML_NS_ERROR_INVALID_DECL (-1001)

/* test for a namespace prefix that begins with [Xx][Mm][Ll] */
#define APR_XML_NS_IS_RESERVED(name) \
	( (name[0] == 'X' || name[0] == 'x') && \
	  (name[1] == 'M' || name[1] == 'm') && \
	  (name[2] == 'L' || name[2] == 'l') )


/* the real (internal) definition of the parser context */
struct apr_xml_parser {
    apr_xml_doc *doc;		/* the doc we're parsing */
    apr_pool_t *p;		/* the pool we allocate from */
    apr_xml_elem *cur_elem;	/* current element */

    int error;			/* an error has occurred */
#define APR_XML_ERROR_EXPAT             1
#define APR_XML_ERROR_PARSE_DONE        2
/* also: public APR_XML_NS_ERROR_* values (if any) */

    XML_Parser xp;              /* the actual (Expat) XML parser */
    enum XML_Error xp_err;      /* stored Expat error code */
};

/* struct for scoping namespace declarations */
typedef struct apr_xml_ns_scope {
    const char *prefix;		/* prefix used for this ns */
    int ns;			/* index into namespace table */
    int emptyURI;		/* the namespace URI is the empty string */
    struct apr_xml_ns_scope *next;	/* next scoped namespace */
} apr_xml_ns_scope;


/* return namespace table index for a given prefix */
static int find_prefix(apr_xml_parser *parser, const char *prefix)
{
    apr_xml_elem *elem = parser->cur_elem;

    /*
    ** Walk up the tree, looking for a namespace scope that defines this
    ** prefix.
    */
    for (; elem; elem = elem->parent) {
	apr_xml_ns_scope *ns_scope = elem->ns_scope;

	for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) {
	    if (strcmp(prefix, ns_scope->prefix) == 0) {
		if (ns_scope->emptyURI) {
		    /*
		    ** It is possible to set the default namespace to an
		    ** empty URI string; this resets the default namespace
		    ** to mean "no namespace." We just found the prefix
		    ** refers to an empty URI, so return "no namespace."
		    */
		    return APR_XML_NS_NONE;
		}

		return ns_scope->ns;
	    }
	}
    }

    /*
     * If the prefix is empty (""), this means that a prefix was not
     * specified in the element/attribute. The search that was performed
     * just above did not locate a default namespace URI (which is stored
     * into ns_scope with an empty prefix). This means the element/attribute
     * has "no namespace". We have a reserved value for this.
     */
    if (*prefix == '\0') {
	return APR_XML_NS_NONE;
    }

    /* not found */
    return APR_XML_NS_ERROR_UNKNOWN_PREFIX;
}

static void start_handler(void *userdata, const char *name, const char **attrs)
{
    apr_xml_parser *parser = userdata;
    apr_xml_elem *elem;
    apr_xml_attr *attr;
    apr_xml_attr *prev;
    char *colon;
    const char *quoted;
    char *elem_name;

    /* punt once we find an error */
    if (parser->error)
	return;

    elem = apr_pcalloc(parser->p, sizeof(*elem));

    /* prep the element */
    elem->name = elem_name = apr_pstrdup(parser->p, name);

    /* fill in the attributes (note: ends up in reverse order) */
    while (*attrs) {
	attr = apr_palloc(parser->p, sizeof(*attr));
	attr->name = apr_pstrdup(parser->p, *attrs++);
	attr->value = apr_pstrdup(parser->p, *attrs++);
	attr->next = elem->attr;
	elem->attr = attr;
    }

    /* hook the element into the tree */
    if (parser->cur_elem == NULL) {
	/* no current element; this also becomes the root */
	parser->cur_elem = parser->doc->root = elem;
    }
    else {
	/* this element appeared within the current elem */
	elem->parent = parser->cur_elem;

	/* set up the child/sibling links */
	if (elem->parent->last_child == NULL) {
	    /* no first child either */
	    elem->parent->first_child = elem->parent->last_child = elem;
	}
	else {
	    /* hook onto the end of the parent's children */
	    elem->parent->last_child->next = elem;
	    elem->parent->last_child = elem;
	}

	/* this element is now the current element */
	parser->cur_elem = elem;
    }

    /* scan the attributes for namespace declarations */
    for (prev = NULL, attr = elem->attr;
	 attr;
	 attr = attr->next) {
	if (strncmp(attr->name, "xmlns", 5) == 0) {
	    const char *prefix = &attr->name[5];
	    apr_xml_ns_scope *ns_scope;

	    /* test for xmlns:foo= form and xmlns= form */
	    if (*prefix == ':') {
                /* a namespace prefix declaration must have a
                   non-empty value. */
                if (attr->value[0] == '\0') {
                    parser->error = APR_XML_NS_ERROR_INVALID_DECL;
                    return;
                }
		++prefix;
            }
	    else if (*prefix != '\0') {
		/* advance "prev" since "attr" is still present */
		prev = attr;
		continue;
	    }

	    /* quote the URI before we ever start working with it */
	    quoted = apr_xml_quote_string(parser->p, attr->value, 1);

	    /* build and insert the new scope */
	    ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope));
	    ns_scope->prefix = prefix;
	    ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted);
	    ns_scope->emptyURI = *quoted == '\0';
	    ns_scope->next = elem->ns_scope;
	    elem->ns_scope = ns_scope;

	    /* remove this attribute from the element */
	    if (prev == NULL)
		elem->attr = attr->next;
	    else
		prev->next = attr->next;

	    /* Note: prev will not be advanced since we just removed "attr" */
	}
	else if (strcmp(attr->name, "xml:lang") == 0) {
	    /* save away the language (in quoted form) */
	    elem->lang = apr_xml_quote_string(parser->p, attr->value, 1);

	    /* remove this attribute from the element */
	    if (prev == NULL)
		elem->attr = attr->next;
	    else
		prev->next = attr->next;

	    /* Note: prev will not be advanced since we just removed "attr" */
	}
	else {
	    /* advance "prev" since "attr" is still present */
	    prev = attr;
	}
    }

    /*
    ** If an xml:lang attribute didn't exist (lang==NULL), then copy the
    ** language from the parent element (if present).
    **
    ** NOTE: elem_size() *depends* upon this pointer equality.
    */
    if (elem->lang == NULL && elem->parent != NULL)
	elem->lang = elem->parent->lang;

    /* adjust the element's namespace */
    colon = strchr(elem_name, ':');
    if (colon == NULL) {
	/*
	 * The element is using the default namespace, which will always
	 * be found. Either it will be "no namespace", or a default
	 * namespace URI has been specified at some point.
	 */
	elem->ns = find_prefix(parser, "");
    }
    else if (APR_XML_NS_IS_RESERVED(elem->name)) {
	elem->ns = APR_XML_NS_NONE;
    }
    else {
	*colon = '\0';
	elem->ns = find_prefix(parser, elem->name);
	elem->name = colon + 1;

	if (APR_XML_NS_IS_ERROR(elem->ns)) {
	    parser->error = elem->ns;
	    return;
	}
    }

    /* adjust all remaining attributes' namespaces */
    for (attr = elem->attr; attr; attr = attr->next) {
        /*
         * apr_xml_attr defines this as "const" but we dup'd it, so we
         * know that we can change it. a bit hacky, but the existing
         * structure def is best.
         */
        char *attr_name = (char *)attr->name;

	colon = strchr(attr_name, ':');
	if (colon == NULL) {
	    /*
	     * Attributes do NOT use the default namespace. Therefore,
	     * we place them into the "no namespace" category.
	     */
	    attr->ns = APR_XML_NS_NONE;
	}
	else if (APR_XML_NS_IS_RESERVED(attr->name)) {
	    attr->ns = APR_XML_NS_NONE;
	}
	else {
	    *colon = '\0';
	    attr->ns = find_prefix(parser, attr->name);
	    attr->name = colon + 1;

	    if (APR_XML_NS_IS_ERROR(attr->ns)) {
		parser->error = attr->ns;
		return;
	    }
	}
    }
}

static void end_handler(void *userdata, const char *name)
{
    apr_xml_parser *parser = userdata;

    /* punt once we find an error */
    if (parser->error)
	return;

    /* pop up one level */
    parser->cur_elem = parser->cur_elem->parent;
}

static void cdata_handler(void *userdata, const char *data, int len)
{
    apr_xml_parser *parser = userdata;
    apr_xml_elem *elem;
    apr_text_header *hdr;
    const char *s;

    /* punt once we find an error */
    if (parser->error)
	return;

    elem = parser->cur_elem;
    s = apr_pstrndup(parser->p, data, len);

    if (elem->last_child == NULL) {
	/* no children yet. this cdata follows the start tag */
	hdr = &elem->first_cdata;
    }
    else {
	/* child elements exist. this cdata follows the last child. */
	hdr = &elem->last_child->following_cdata;
    }

    apr_text_append(parser->p, hdr, s);
}

static apr_status_t cleanup_parser(void *ctx)
{
    apr_xml_parser *parser = ctx;

    XML_ParserFree(parser->xp);
    parser->xp = NULL;

    return APR_SUCCESS;
}

APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool)
{
    apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));

    parser->p = pool;
    parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));

    parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));

    /* ### is there a way to avoid hard-coding this? */
    apr_xml_insert_uri(parser->doc->namespaces, "DAV:");

    parser->xp = XML_ParserCreate(NULL);
    if (parser->xp == NULL) {
        (*apr_pool_get_abort(pool))(APR_ENOMEM);
        return NULL;
    }

    apr_pool_cleanup_register(pool, parser, cleanup_parser,
                              apr_pool_cleanup_null);

    XML_SetUserData(parser->xp, parser);
    XML_SetElementHandler(parser->xp, start_handler, end_handler);
    XML_SetCharacterDataHandler(parser->xp, cdata_handler);

    return parser;
}

static apr_status_t do_parse(apr_xml_parser *parser,
                             const char *data, apr_size_t len,
                             int is_final)
{
    if (parser->xp == NULL) {
        parser->error = APR_XML_ERROR_PARSE_DONE;
    }
    else {
        int rv = XML_Parse(parser->xp, data, len, is_final);

        if (rv == 0) {
            parser->error = APR_XML_ERROR_EXPAT;
            parser->xp_err = XML_GetErrorCode(parser->xp);
        }
    }

    /* ### better error code? */
    return parser->error ? APR_EGENERAL : APR_SUCCESS;
}

APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
                                              const char *data,
                                              apr_size_t len)
{
    return do_parse(parser, data, len, 0 /* is_final */);
}

APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
                                              apr_xml_doc **pdoc)
{
    char end;
    apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */);

    /* get rid of the parser */
    (void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser);

    if (status)
        return status;

    if (pdoc != NULL)
        *pdoc = parser->doc;
    return APR_SUCCESS;
}

APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
                                            char *errbuf,
                                            apr_size_t errbufsize)
{
    int error = parser->error;
    const char *msg;

    /* clear our record of an error */
    parser->error = 0;

    switch (error) {
    case 0:
        msg = "No error.";
        break;

    case APR_XML_NS_ERROR_UNKNOWN_PREFIX:
        msg = "An undefined namespace prefix was used.";
        break;

    case APR_XML_NS_ERROR_INVALID_DECL:
        msg = "A namespace prefix was defined with an empty URI.";
        break;

    case APR_XML_ERROR_EXPAT:
        (void) apr_snprintf(errbuf, errbufsize,
                            "XML parser error code: %s (%d)",
                            XML_ErrorString(parser->xp_err), parser->xp_err);
        return errbuf;

    case APR_XML_ERROR_PARSE_DONE:
        msg = "The parser is not active.";
        break;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -