⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wbxml_parser.c

📁 WAP Binary XML 简单地说
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * libwbxml, the WBXML Library. * Copyright (C) 2002-2005 Aymerick Jehanne <aymerick@jehanne.org> *  * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. *  * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Lesser General Public License for more details. *  * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA *  * LGPL v2.1: http://www.gnu.org/copyleft/lesser.txt *  * Contact: libwbxml@aymerick.com * Home: http://libwbxml.aymerick.com */ /** * @file wbxml_parser.c * @ingroup wbxml_parser * * @author Aymerick Jehanne <libwbxml@aymerick.com> * @date 02/03/12 * * @brief WBXML Parser - Parse a WBXML document and call user defined Callbacks * * @todo Handle correctly charset * * @note WBXML Versions Differences: *            - WBXML 1.2: - No differences with WBXML 1.3, except a clarification in BNF for 'LITERAL' handling *            - WBXML 1.1: - No Switch Page mecanism *                         - An Attribute value can't be "opaque" *            - WBXML 1.0: - No 'charset' handling *                         - No 'opaque' support *                         - A strange 'body' rule description in BNF (just forget it). */#include "wbxml.h"/* Memory management related defines */#define WBXML_PARSER_MALLOC_BLOCK 5000#define WBXML_PARSER_STRING_TABLE_MALLOC_BLOCK 200#define WBXML_PARSER_ATTR_VALUE_MALLOC_BLOCK 100/** Set it to '1' for Best Effort mode */#define WBXML_PARSER_BEST_EFFORT 1/** For unknown Tag Name or Attribute Name (in Best Effort Mode) */#define WBXML_PARSER_UNKNOWN_STRING ((WB_UTINY *)"unknown")/** If you want to modify this define, change the 'entcode' variable length in parse_entity() too please */#define WBXML_PARSER_MAX_ENTITY_CODE 999999/** * @brief The WBXML Application Token types */typedef enum WBXMLTokenType_e {    WBXML_TAG_TOKEN,        /**< Tag token */    WBXML_ATTR_TOKEN        /**< Attribute token */} WBXMLTokenType;/** * @brief The WBXML Parser * @warning For now 'current_tag' field is only used for WV Content Parsing. And for this use, it works. *          But this field is reset after End Tag, and as there is no Linked List mecanism, this is bad for *          cascading elements: we don't fill this field with parent Tag when parsing End Tag. */struct WBXMLParser_s {    void                 *user_data;       /**< User Data */    WBXMLContentHandler  *content_hdl;     /**< Content Handlers Callbacks */    WBXMLBuffer          *wbxml;           /**< The wbxml we are parsing */        WBXMLBuffer          *strstbl;         /**< String Table specified in WBXML document */    const WBXMLLangEntry *langTable;       /**< Current document Language Table */    const WBXMLLangEntry *mainTable;       /**< Main WBXML Languages Table */    const WBXMLTagEntry  *current_tag;     /**< Current Tag */      WBXMLLanguage         lang_forced;     /**< Language forced by User */    WB_ULONG              public_id;       /**< Public ID specified in WBXML document */        WB_LONG               public_id_index; /**< If Public ID is a String Table reference,                                                this is the index defined in the strtbl */    WBXMLCharsetMIBEnum   charset;         /**< Charset of WBXML document */    WBXMLCharsetMIBEnum   meta_charset;    /**< Meta-info provided by user: only used if                                                Charset is not specified in WBXML document */    WB_ULONG              pos;             /**< Position of parsing curser in wbxml */        WBXMLVersion          version;         /**< WBXML Version field specified in WBXML document */    WB_UTINY              tagCodePage;     /**< Current Tag Code Page */    WB_UTINY              attrCodePage;    /**< Current Attribute Code Page */};/*************************************************** *    Private Functions prototypes *//* WBXML Parser functions */static void wbxml_parser_reinit(WBXMLParser *parser);/* Check functions */static WB_BOOL is_token(WBXMLParser *parser, WB_UTINY token);static WB_BOOL is_literal(WBXMLParser *parser);static WB_BOOL is_attr_value(WBXMLParser *parser);static WB_BOOL is_string(WBXMLParser *parser);static WB_BOOL is_extension(WBXMLParser *parser);static WB_BOOL check_public_id(WBXMLParser *parser);/* Parse functions */static WBXMLError parse_version(WBXMLParser *parser);static WBXMLError parse_publicid(WBXMLParser *parser);static WBXMLError parse_charset(WBXMLParser *parser);static WBXMLError parse_strtbl(WBXMLParser *parser);static WBXMLError parse_body(WBXMLParser *parser);static WBXMLError parse_pi(WBXMLParser *parser);static WBXMLError parse_element(WBXMLParser *parser);static void free_attrs_table(WBXMLAttribute **attrs);static WBXMLError parse_switch_page(WBXMLParser *parser, WBXMLTokenType  code_space);static WBXMLError parse_stag(WBXMLParser *parser, WB_UTINY *tag, WBXMLTag **element);static WBXMLError parse_tag(WBXMLParser *parser, WB_UTINY *tag, WBXMLTag **element);static WBXMLError parse_attribute(WBXMLParser *parser, WBXMLAttribute **attr);static WBXMLError parse_content(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_string(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_extension(WBXMLParser *parser, WBXMLTokenType code_space, WBXMLBuffer **result);static WBXMLError parse_entity(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_opaque(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_literal(WBXMLParser *parser, WB_UTINY *tag, WBXMLBuffer **result);static WBXMLError parse_attr_start(WBXMLParser *parser, WBXMLAttributeName **name, const WB_UTINY **value);static WBXMLError parse_attr_value(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_termstr(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_inline(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_tableref(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_entcode(WBXMLParser *parser, WB_ULONG *result);static WBXMLError get_strtbl_reference(WBXMLParser *parser, WB_ULONG index, WBXMLBuffer **result);/* Basic Types Parse functions */static WBXMLError parse_uint8(WBXMLParser *parser, WB_UTINY *result);static WBXMLError parse_mb_uint32(WBXMLParser *parser, WB_ULONG *result);/* Language Specific Decoding Functions */#if defined( WBXML_SUPPORT_SI ) || defined( WBXML_SUPPORT_EMN )static WBXMLError decode_datetime(WBXMLBuffer *buff);#endif /* WBXML_SUPPORT_SI || WBXML_SUPPORT_EMN */static WBXMLError decode_opaque_content(WBXMLParser *parser, WBXMLBuffer **data);#if defined( WBXML_SUPPORT_WV )static WBXMLError decode_wv_content(WBXMLParser *parser, WBXMLBuffer **data);static WBXMLError decode_wv_integer(WBXMLBuffer **data);static WBXMLError decode_wv_datetime(WBXMLBuffer **data);#endif /* WBXML_SUPPORT_WV */#if defined( WBXML_SUPPORT_DRMREL )static WBXMLError decode_drmrel_keyvalue(WBXMLBuffer **data);#endif /* WBXML_SUPPORT_DRMREL *//* Macro for error handling */#define CHECK_ERROR if (ret != WBXML_OK) return ret;/*************************************************** *    Public Functions */WBXML_DECLARE(WBXMLParser *) wbxml_parser_create(void){    WBXMLParser *parser = NULL;    parser = (WBXMLParser *) wbxml_malloc(sizeof(WBXMLParser));    if (parser == NULL) {        return NULL;    }        parser->wbxml = NULL;    parser->user_data = NULL;    parser->content_hdl = NULL;    parser->strstbl = NULL;    parser->langTable = NULL;    /* Default Main WBXML Languages Table */    parser->mainTable = wbxml_tables_get_main();    parser->current_tag = NULL;    parser->lang_forced = WBXML_LANG_UNKNOWN;    parser->public_id = WBXML_PUBLIC_ID_UNKNOWN;        parser->public_id_index = -1;    parser->charset = WBXML_CHARSET_UNKNOWN;    parser->meta_charset = WBXML_CHARSET_UNKNOWN;    parser->version = WBXML_VERSION_UNKNOWN;            parser->pos = 0;    parser->tagCodePage = 0;    parser->attrCodePage = 0;    return parser;}WBXML_DECLARE(void) wbxml_parser_destroy(WBXMLParser *parser){    if (parser == NULL)        return;        wbxml_buffer_destroy(parser->wbxml);    wbxml_buffer_destroy(parser->strstbl);    wbxml_free(parser);}WBXML_DECLARE(WBXMLError) wbxml_parser_parse(WBXMLParser *parser, WB_UTINY *wbxml, WB_ULONG wbxml_len){    WBXMLError ret = WBXML_OK;    if (parser == NULL)        return WBXML_ERROR_NULL_PARSER;    if ((wbxml == NULL) || (wbxml_len <= 0))        return WBXML_ERROR_EMPTY_WBXML;    /* Reinitialize WBXML Parser */    wbxml_parser_reinit(parser);    parser->wbxml = wbxml_buffer_create(wbxml, wbxml_len, WBXML_PARSER_MALLOC_BLOCK);    if (parser->wbxml == NULL)        return WBXML_ERROR_NOT_ENOUGH_MEMORY;    /* WBXML Version */    ret = parse_version(parser);    CHECK_ERROR    if ((WB_UTINY)parser->version > WBXML_VERSION_13)        WBXML_WARNING((WBXML_PARSER, "This library only supports WBXML " WBXML_VERSION_TEXT_13));    /* WBXML Public ID */    ret = parse_publicid(parser);    CHECK_ERROR    /* Ignore Document Public ID if user has forced use of another Public ID */    if (parser->lang_forced != WBXML_LANG_UNKNOWN)        parser->public_id = wbxml_tables_get_wbxml_publicid(wbxml_tables_get_main(), parser->lang_forced);    /* No charset in WBXML 1.0 */    if (parser->version != WBXML_VERSION_10) {        ret = parse_charset(parser);        CHECK_ERROR    }    /* Check charset */    if (parser->charset == WBXML_CHARSET_UNKNOWN) {        if (parser->meta_charset != WBXML_CHARSET_UNKNOWN) {            /* Use meta-information provided by user */            parser->charset = parser->meta_charset;                  WBXML_DEBUG((WBXML_PARSER,                        "Using provided meta charset: %ld",                        parser->meta_charset));        }        else {            /* Default Charset Encoding: UTF-8 */            parser->charset = WBXML_PARSER_DEFAULT_CHARSET;                  WBXML_WARNING((WBXML_PARSER,                           "No charset information found, using default : %s",                           WBXML_PARSER_DEFAULT_CHARSET));        }    }    /* WBXML String Table */    ret = parse_strtbl(parser);    CHECK_ERROR    /* Now that we have parsed String Table, we can check Public ID */    if (!check_public_id(parser)) {        WBXML_ERROR((WBXML_PARSER, "PublicID not found"));        return WBXML_ERROR_UNKNOWN_PUBLIC_ID;    }    /* Call to WBXMLStartDocumentHandler */    if ((parser->content_hdl != NULL) && (parser->content_hdl->start_document_clb != NULL))        parser->content_hdl->start_document_clb(parser->user_data, parser->charset, parser->langTable);    /* WBXML Body */    ret = parse_body(parser);    CHECK_ERROR    /* Call to WBXMLEndDocumentHandler */    if ((parser->content_hdl != NULL) && (parser->content_hdl->end_document_clb != NULL))        parser->content_hdl->end_document_clb(parser->user_data);    return ret;}WBXML_DECLARE(void) wbxml_parser_set_user_data(WBXMLParser *parser, void *user_data){    if (parser != NULL)        parser->user_data = user_data;}WBXML_DECLARE(void) wbxml_parser_set_content_handler(WBXMLParser *parser, WBXMLContentHandler *content_handler){    if (parser != NULL)        parser->content_hdl = content_handler;}WBXML_DECLARE(void) wbxml_parser_set_main_table(WBXMLParser *parser, const WBXMLLangEntry *main_table){    if (parser != NULL)        parser->mainTable = main_table;}WBXML_DECLARE(WB_BOOL) wbxml_parser_set_language(WBXMLParser *parser, WBXMLLanguage lang){    if (parser != NULL) {        parser->lang_forced = lang;        return TRUE;    }    return FALSE;}WBXML_DECLARE(WB_BOOL) wbxml_parser_set_meta_charset(WBXMLParser *parser,                                                     WBXMLCharsetMIBEnum charset){    if ( parser != NULL ) {        parser->meta_charset = charset;        return TRUE;    }      return FALSE;}WBXML_DECLARE(WB_ULONG) wbxml_parser_get_wbxml_public_id(WBXMLParser *parser){    if ((parser != NULL) && (parser->langTable != NULL) && (parser->langTable->publicID != NULL))        return parser->langTable->publicID->wbxmlPublicID;    else        return WBXML_PUBLIC_ID_UNKNOWN;}WBXML_DECLARE(const WB_UTINY *) wbxml_parser_get_xml_public_id(WBXMLParser *parser){    if ((parser != NULL) && (parser->langTable != NULL) && (parser->langTable->publicID != NULL))        return (const WB_UTINY *) parser->langTable->publicID->xmlPublicID;    else        return NULL;}WBXML_DECLARE(WBXMLVersion) wbxml_parser_get_wbxml_version(WBXMLParser *parser){    if (parser != NULL)        return parser->version;    else        return WBXML_VERSION_UNKNOWN;}WBXML_DECLARE(WB_LONG) wbxml_parser_get_current_byte_index(WBXMLParser *parser){    if (parser != NULL)        return parser->pos - 1;    else        return 0;}/*************************************************** *    Private Functions *//************************** * WBXML Parser functions *//** * @brief Reinitialize a WBXML Parser * @param parser The WBXMLParser to reinitialize * @note Only reinitialize internal fields of parser, and so keep User Data *         and Content Handler pointers. */static void wbxml_parser_reinit(WBXMLParser *parser){    if (parser == NULL)        return;    wbxml_buffer_destroy(parser->wbxml);    parser->wbxml           = NULL;      wbxml_buffer_destroy(parser->strstbl);    parser->strstbl         = NULL;      parser->langTable       = NULL;    parser->current_tag     = NULL;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -