📄 wbxml_parser.c
字号:
/* * libwbxml, the WBXML Library. * Copyright (C) 2002-2005 Aymerick Jehanne <aymerick@jehanne.org> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * LGPL v2.1: http://www.gnu.org/copyleft/lesser.txt * * Contact: libwbxml@aymerick.com * Home: http://libwbxml.aymerick.com */ /** * @file wbxml_parser.c * @ingroup wbxml_parser * * @author Aymerick Jehanne <libwbxml@aymerick.com> * @date 02/03/12 * * @brief WBXML Parser - Parse a WBXML document and call user defined Callbacks * * @todo Handle correctly charset * * @note WBXML Versions Differences: * - WBXML 1.2: - No differences with WBXML 1.3, except a clarification in BNF for 'LITERAL' handling * - WBXML 1.1: - No Switch Page mecanism * - An Attribute value can't be "opaque" * - WBXML 1.0: - No 'charset' handling * - No 'opaque' support * - A strange 'body' rule description in BNF (just forget it). */#include "wbxml.h"/* Memory management related defines */#define WBXML_PARSER_MALLOC_BLOCK 5000#define WBXML_PARSER_STRING_TABLE_MALLOC_BLOCK 200#define WBXML_PARSER_ATTR_VALUE_MALLOC_BLOCK 100/** Set it to '1' for Best Effort mode */#define WBXML_PARSER_BEST_EFFORT 1/** For unknown Tag Name or Attribute Name (in Best Effort Mode) */#define WBXML_PARSER_UNKNOWN_STRING ((WB_UTINY *)"unknown")/** If you want to modify this define, change the 'entcode' variable length in parse_entity() too please */#define WBXML_PARSER_MAX_ENTITY_CODE 999999/** * @brief The WBXML Application Token types */typedef enum WBXMLTokenType_e { WBXML_TAG_TOKEN, /**< Tag token */ WBXML_ATTR_TOKEN /**< Attribute token */} WBXMLTokenType;/** * @brief The WBXML Parser * @warning For now 'current_tag' field is only used for WV Content Parsing. And for this use, it works. * But this field is reset after End Tag, and as there is no Linked List mecanism, this is bad for * cascading elements: we don't fill this field with parent Tag when parsing End Tag. */struct WBXMLParser_s { void *user_data; /**< User Data */ WBXMLContentHandler *content_hdl; /**< Content Handlers Callbacks */ WBXMLBuffer *wbxml; /**< The wbxml we are parsing */ WBXMLBuffer *strstbl; /**< String Table specified in WBXML document */ const WBXMLLangEntry *langTable; /**< Current document Language Table */ const WBXMLLangEntry *mainTable; /**< Main WBXML Languages Table */ const WBXMLTagEntry *current_tag; /**< Current Tag */ WBXMLLanguage lang_forced; /**< Language forced by User */ WB_ULONG public_id; /**< Public ID specified in WBXML document */ WB_LONG public_id_index; /**< If Public ID is a String Table reference, this is the index defined in the strtbl */ WBXMLCharsetMIBEnum charset; /**< Charset of WBXML document */ WBXMLCharsetMIBEnum meta_charset; /**< Meta-info provided by user: only used if Charset is not specified in WBXML document */ WB_ULONG pos; /**< Position of parsing curser in wbxml */ WBXMLVersion version; /**< WBXML Version field specified in WBXML document */ WB_UTINY tagCodePage; /**< Current Tag Code Page */ WB_UTINY attrCodePage; /**< Current Attribute Code Page */};/*************************************************** * Private Functions prototypes *//* WBXML Parser functions */static void wbxml_parser_reinit(WBXMLParser *parser);/* Check functions */static WB_BOOL is_token(WBXMLParser *parser, WB_UTINY token);static WB_BOOL is_literal(WBXMLParser *parser);static WB_BOOL is_attr_value(WBXMLParser *parser);static WB_BOOL is_string(WBXMLParser *parser);static WB_BOOL is_extension(WBXMLParser *parser);static WB_BOOL check_public_id(WBXMLParser *parser);/* Parse functions */static WBXMLError parse_version(WBXMLParser *parser);static WBXMLError parse_publicid(WBXMLParser *parser);static WBXMLError parse_charset(WBXMLParser *parser);static WBXMLError parse_strtbl(WBXMLParser *parser);static WBXMLError parse_body(WBXMLParser *parser);static WBXMLError parse_pi(WBXMLParser *parser);static WBXMLError parse_element(WBXMLParser *parser);static void free_attrs_table(WBXMLAttribute **attrs);static WBXMLError parse_switch_page(WBXMLParser *parser, WBXMLTokenType code_space);static WBXMLError parse_stag(WBXMLParser *parser, WB_UTINY *tag, WBXMLTag **element);static WBXMLError parse_tag(WBXMLParser *parser, WB_UTINY *tag, WBXMLTag **element);static WBXMLError parse_attribute(WBXMLParser *parser, WBXMLAttribute **attr);static WBXMLError parse_content(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_string(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_extension(WBXMLParser *parser, WBXMLTokenType code_space, WBXMLBuffer **result);static WBXMLError parse_entity(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_opaque(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_literal(WBXMLParser *parser, WB_UTINY *tag, WBXMLBuffer **result);static WBXMLError parse_attr_start(WBXMLParser *parser, WBXMLAttributeName **name, const WB_UTINY **value);static WBXMLError parse_attr_value(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_termstr(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_inline(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_tableref(WBXMLParser *parser, WBXMLBuffer **result);static WBXMLError parse_entcode(WBXMLParser *parser, WB_ULONG *result);static WBXMLError get_strtbl_reference(WBXMLParser *parser, WB_ULONG index, WBXMLBuffer **result);/* Basic Types Parse functions */static WBXMLError parse_uint8(WBXMLParser *parser, WB_UTINY *result);static WBXMLError parse_mb_uint32(WBXMLParser *parser, WB_ULONG *result);/* Language Specific Decoding Functions */#if defined( WBXML_SUPPORT_SI ) || defined( WBXML_SUPPORT_EMN )static WBXMLError decode_datetime(WBXMLBuffer *buff);#endif /* WBXML_SUPPORT_SI || WBXML_SUPPORT_EMN */static WBXMLError decode_opaque_content(WBXMLParser *parser, WBXMLBuffer **data);#if defined( WBXML_SUPPORT_WV )static WBXMLError decode_wv_content(WBXMLParser *parser, WBXMLBuffer **data);static WBXMLError decode_wv_integer(WBXMLBuffer **data);static WBXMLError decode_wv_datetime(WBXMLBuffer **data);#endif /* WBXML_SUPPORT_WV */#if defined( WBXML_SUPPORT_DRMREL )static WBXMLError decode_drmrel_keyvalue(WBXMLBuffer **data);#endif /* WBXML_SUPPORT_DRMREL *//* Macro for error handling */#define CHECK_ERROR if (ret != WBXML_OK) return ret;/*************************************************** * Public Functions */WBXML_DECLARE(WBXMLParser *) wbxml_parser_create(void){ WBXMLParser *parser = NULL; parser = (WBXMLParser *) wbxml_malloc(sizeof(WBXMLParser)); if (parser == NULL) { return NULL; } parser->wbxml = NULL; parser->user_data = NULL; parser->content_hdl = NULL; parser->strstbl = NULL; parser->langTable = NULL; /* Default Main WBXML Languages Table */ parser->mainTable = wbxml_tables_get_main(); parser->current_tag = NULL; parser->lang_forced = WBXML_LANG_UNKNOWN; parser->public_id = WBXML_PUBLIC_ID_UNKNOWN; parser->public_id_index = -1; parser->charset = WBXML_CHARSET_UNKNOWN; parser->meta_charset = WBXML_CHARSET_UNKNOWN; parser->version = WBXML_VERSION_UNKNOWN; parser->pos = 0; parser->tagCodePage = 0; parser->attrCodePage = 0; return parser;}WBXML_DECLARE(void) wbxml_parser_destroy(WBXMLParser *parser){ if (parser == NULL) return; wbxml_buffer_destroy(parser->wbxml); wbxml_buffer_destroy(parser->strstbl); wbxml_free(parser);}WBXML_DECLARE(WBXMLError) wbxml_parser_parse(WBXMLParser *parser, WB_UTINY *wbxml, WB_ULONG wbxml_len){ WBXMLError ret = WBXML_OK; if (parser == NULL) return WBXML_ERROR_NULL_PARSER; if ((wbxml == NULL) || (wbxml_len <= 0)) return WBXML_ERROR_EMPTY_WBXML; /* Reinitialize WBXML Parser */ wbxml_parser_reinit(parser); parser->wbxml = wbxml_buffer_create(wbxml, wbxml_len, WBXML_PARSER_MALLOC_BLOCK); if (parser->wbxml == NULL) return WBXML_ERROR_NOT_ENOUGH_MEMORY; /* WBXML Version */ ret = parse_version(parser); CHECK_ERROR if ((WB_UTINY)parser->version > WBXML_VERSION_13) WBXML_WARNING((WBXML_PARSER, "This library only supports WBXML " WBXML_VERSION_TEXT_13)); /* WBXML Public ID */ ret = parse_publicid(parser); CHECK_ERROR /* Ignore Document Public ID if user has forced use of another Public ID */ if (parser->lang_forced != WBXML_LANG_UNKNOWN) parser->public_id = wbxml_tables_get_wbxml_publicid(wbxml_tables_get_main(), parser->lang_forced); /* No charset in WBXML 1.0 */ if (parser->version != WBXML_VERSION_10) { ret = parse_charset(parser); CHECK_ERROR } /* Check charset */ if (parser->charset == WBXML_CHARSET_UNKNOWN) { if (parser->meta_charset != WBXML_CHARSET_UNKNOWN) { /* Use meta-information provided by user */ parser->charset = parser->meta_charset; WBXML_DEBUG((WBXML_PARSER, "Using provided meta charset: %ld", parser->meta_charset)); } else { /* Default Charset Encoding: UTF-8 */ parser->charset = WBXML_PARSER_DEFAULT_CHARSET; WBXML_WARNING((WBXML_PARSER, "No charset information found, using default : %s", WBXML_PARSER_DEFAULT_CHARSET)); } } /* WBXML String Table */ ret = parse_strtbl(parser); CHECK_ERROR /* Now that we have parsed String Table, we can check Public ID */ if (!check_public_id(parser)) { WBXML_ERROR((WBXML_PARSER, "PublicID not found")); return WBXML_ERROR_UNKNOWN_PUBLIC_ID; } /* Call to WBXMLStartDocumentHandler */ if ((parser->content_hdl != NULL) && (parser->content_hdl->start_document_clb != NULL)) parser->content_hdl->start_document_clb(parser->user_data, parser->charset, parser->langTable); /* WBXML Body */ ret = parse_body(parser); CHECK_ERROR /* Call to WBXMLEndDocumentHandler */ if ((parser->content_hdl != NULL) && (parser->content_hdl->end_document_clb != NULL)) parser->content_hdl->end_document_clb(parser->user_data); return ret;}WBXML_DECLARE(void) wbxml_parser_set_user_data(WBXMLParser *parser, void *user_data){ if (parser != NULL) parser->user_data = user_data;}WBXML_DECLARE(void) wbxml_parser_set_content_handler(WBXMLParser *parser, WBXMLContentHandler *content_handler){ if (parser != NULL) parser->content_hdl = content_handler;}WBXML_DECLARE(void) wbxml_parser_set_main_table(WBXMLParser *parser, const WBXMLLangEntry *main_table){ if (parser != NULL) parser->mainTable = main_table;}WBXML_DECLARE(WB_BOOL) wbxml_parser_set_language(WBXMLParser *parser, WBXMLLanguage lang){ if (parser != NULL) { parser->lang_forced = lang; return TRUE; } return FALSE;}WBXML_DECLARE(WB_BOOL) wbxml_parser_set_meta_charset(WBXMLParser *parser, WBXMLCharsetMIBEnum charset){ if ( parser != NULL ) { parser->meta_charset = charset; return TRUE; } return FALSE;}WBXML_DECLARE(WB_ULONG) wbxml_parser_get_wbxml_public_id(WBXMLParser *parser){ if ((parser != NULL) && (parser->langTable != NULL) && (parser->langTable->publicID != NULL)) return parser->langTable->publicID->wbxmlPublicID; else return WBXML_PUBLIC_ID_UNKNOWN;}WBXML_DECLARE(const WB_UTINY *) wbxml_parser_get_xml_public_id(WBXMLParser *parser){ if ((parser != NULL) && (parser->langTable != NULL) && (parser->langTable->publicID != NULL)) return (const WB_UTINY *) parser->langTable->publicID->xmlPublicID; else return NULL;}WBXML_DECLARE(WBXMLVersion) wbxml_parser_get_wbxml_version(WBXMLParser *parser){ if (parser != NULL) return parser->version; else return WBXML_VERSION_UNKNOWN;}WBXML_DECLARE(WB_LONG) wbxml_parser_get_current_byte_index(WBXMLParser *parser){ if (parser != NULL) return parser->pos - 1; else return 0;}/*************************************************** * Private Functions *//************************** * WBXML Parser functions *//** * @brief Reinitialize a WBXML Parser * @param parser The WBXMLParser to reinitialize * @note Only reinitialize internal fields of parser, and so keep User Data * and Content Handler pointers. */static void wbxml_parser_reinit(WBXMLParser *parser){ if (parser == NULL) return; wbxml_buffer_destroy(parser->wbxml); parser->wbxml = NULL; wbxml_buffer_destroy(parser->strstbl); parser->strstbl = NULL; parser->langTable = NULL; parser->current_tag = NULL;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -