⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wbxml_encoder.c

📁 WAP Binary XML 简单地说
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * libwbxml, the WBXML Library. * Copyright (C) 2002-2005 Aymerick Jehanne <aymerick@jehanne.org> *  * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. *  * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Lesser General Public License for more details. *  * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA *  * LGPL v2.1: http://www.gnu.org/copyleft/lesser.txt *  * Contact: libwbxml@aymerick.com * Home: http://libwbxml.aymerick.com *//** * @file wbxml_encoder.c * @ingroup wbxml_encoder * * @author Aymerick Jehanne <libwbxml@aymerick.com> * @date 11/11/02 * * @brief WBXML Encoder - Encodes a WBXML Tree to WBXML or to XML * * @note Inspired from kannel WML Encoder (http://www.kannel.org) * * @note [OMA WV 1.1] : OMA-WV-CSP_WBXML-V1_1-20021001-A.pdf * * @todo Parse CDDATA * @todo Parse PI * @todo Handle Charsets Encoding * @todo Really generate ENTITY tokens * @todo Handle Namespaces ! * @todo For canonical XML output: Sort the Attributes * @todo When adding string to String Table, check that this is not a Content Text that will be tokenized * @todo For Wireless-Village CSP : *              - Encode "Date and Time" in OPAQUE (OMA-WV-CSP_WBXML-V1_1-20021001-A.pdf - 6.6) * * @todo Review the canonical XML generation: *       - http://www.jclark.com/xml/canonxml.html*        - http://www.w3.org/TR/2004/REC-xml-20040204/ */#include <ctype.h> /* For isdigit() */#include "wbxml.h"/** * Compilation Flag: WBXML_ENCODER_USE_STRTBL * ----------------- * Do We Use String Table when Encoding to WBXML ? * (NOTE: We still use String Table for Unknown Public ID, even if this flag is not set) *//* WBXML Header:    version     publicid    charset     length *                  u_int8      mb_u_int32  mb_u_int32  mb_u_int32 *                  1 octet     5 octets    5 octets    5 octets   :  16 octets * mb_u_int32: 5 octets (to handle continuation bits) */#define WBXML_HEADER_MAX_LEN 16/* Memory management related defines */#define WBXML_ENCODER_XML_DOC_MALLOC_BLOCK 5000#define WBXML_ENCODER_WBXML_DOC_MALLOC_BLOCK 1000#define WBXML_ENCODER_XML_HEADER_MALLOC_BLOCK 250#define WBXML_ENCODER_WBXML_HEADER_MALLOC_BLOCK WBXML_HEADER_MAX_LEN/* WBXML Default Charset: UTF-8 (106) */#define WBXML_ENCODER_DEFAULT_CHARSET 0x6a/* String Terminating NULL Char */#define WBXML_STR_END '\0'/* Minimum String Size needed for String Table - @note Set to '3' for Prov 1.0 */#define WBXML_ENCODER_STRING_TABLE_MIN 3/** * Default charset of the outputed WBXML document. Used only in this case : *  - No charset was indicated thanks to the function 'wbxml_encoder_set_output_charset()' *  - and the WBXML Tree field 'orig_charset' is set to WBXML_CHARSET_UNKNOWN (ie. charset *    information not found in original document) */#define WBXML_ENCODER_WBXML_DEFAULT_CHARSET WBXML_CHARSET_UTF_8/** * Default charset of the outputed XML document. Used only in this case : *  - No charset was indicated thanks to the function 'wbxml_encoder_set_output_charset()' *  - and the WBXML Tree field 'orig_charset' is set to WBXML_CHARSET_UNKNOWN (ie. charset *    information not found in original document) */#define WBXML_ENCODER_XML_DEFAULT_CHARSET WBXML_CHARSET_UTF_8/** * If defined, generate empty XML elements (eg: <foo />), else generate * full "end element" (eg: <foo></foo>) * * @todo This must be a 'WBXMLGenXMLParams' parameter */#define WBXML_ENCODER_XML_GEN_EMPTY_ELT/** * If defined, do not indent elements that have no element child (eg: <foo>bar</foo>), * else indent anyway (eg: <foo> *                           bar *                         </foo>) * * @todo This must be a 'WBXMLGenXMLParams' parameter */#define WBXML_ENCODER_XML_NO_EMPTY_ELT_INDENT/** * @warning For now 'current_tag' field is only used for WV Content Encoding. And for this use, it works. *          But this field is reset after End Tag, and as there is no Linked List mecanism, this is bad for *          cascading elements: we don't fill this field with parent Tag when parsing End Tag. * * @warning For now 'current_text_parent' field is only used for DRM REL Content Encoding. It should not be *          used for another purpose. */struct WBXMLEncoder_s {    WBXMLTree *tree;                        /**< WBXML Tree to Encode */    const WBXMLLangEntry *lang;             /**< Language table to use */    WBXMLBuffer *output;                    /**< The output (wbxml or xml) we are producing */    WBXMLBuffer *output_header;             /**< The output header (used if Flow Mode encoding is activated) */    const WBXMLTagEntry *current_tag;       /**< Current Tag (See The Warning For This Field !) */    const WBXMLTreeNode *current_text_parent; /**< Text parent of current Node (See The Warning For This Field !) */    const WBXMLAttrEntry *current_attr;     /**< Current Attribute */    WB_UTINY tagCodePage;                   /**< Current Tag Code Page */    WB_UTINY attrCodePage;                  /**< Current Attribute Code Page */    WB_BOOL ignore_empty_text;              /**< Do we ignore empty text nodes (ie: ignorable whitespaces)? */    WB_BOOL remove_text_blanks;             /**< Do we remove leading and trailing blanks in text nodes ? */    WBXMLEncoderOutputType output_type;     /**< Output Type */    WBXMLGenXMLType xml_gen_type;           /**< XML Generation Type */    WB_UTINY indent_delta;                  /**< Indent Delta (number of spaces) */    WB_UTINY indent;                        /**< Current Indent */    WB_BOOL in_content;                     /**< We are in Content Text (used for indentation when generating XML output) */    WB_BOOL in_cdata;                       /**< We are in a CDATA section (and so, content must be generaed "as is") */    WBXMLBuffer *cdata;                     /**< Current CDATA Buffer */#if defined( WBXML_ENCODER_USE_STRTBL )    WBXMLList *strstbl;                     /**< String Table we are creating */    WB_ULONG strstbl_len;                   /**< String Table Length */    WB_BOOL use_strtbl;                     /**< Do we use String Table when generating WBXML output ? (default: YES) */#endif /* WBXML_ENCODER_USE_STRTBL */    WB_BOOL xml_encode_header;              /**< Do we generate XML Header ? */    WBXMLVersion wbxml_version;             /**< WBXML Version to use (when generating WBXML output) */    WBXMLCharsetMIBEnum output_charset;     /**< Output charset encoding */    WB_BOOL flow_mode;                      /**< Is Flow Mode encoding activated ? */    WB_ULONG pre_last_node_len;             /**< Output buffer length before last node encoding */    WB_BOOL textual_publicid;               /**< Generate textual Public ID instead of token (when generating WBXML output) */};#if defined( WBXML_ENCODER_USE_STRTBL )/** * @brief The WBXML String Table Element */typedef struct WBXMLStringTableElement_t {    WBXMLBuffer *string; /**< String */    WB_ULONG offset;     /**< Offset of String in String Table */    WB_ULONG count;      /**< Number of times this String is referenced in the XML Document */    WB_BOOL stat;        /**< If set to TRUE, this is a static String that we must not destroy in wbxml_strtbl_element_destroy() function */} WBXMLStringTableElement;#endif /* WBXML_ENCODER_USE_STRTBL *//** * @brief WBXML Value Element Context: In Content or in Attribute Value */typedef enum WBXMLValueElementCtx_e {    WBXML_VALUE_ELEMENT_CTX_CONTENT = 0,    /**< Text Content */    WBXML_VALUE_ELEMENT_CTX_ATTR            /**< Attribute Value */} WBXMLValueElementCtx;/** * @brief WBXML Value Element Type: string / tableref / extension / opaque */typedef enum WBXMLValueElementType_e {    WBXML_VALUE_ELEMENT_STRING = 0, /**< Inline String */    WBXML_VALUE_ELEMENT_EXTENSION,  /**< Extension Token */    WBXML_VALUE_ELEMENT_OPAQUE,     /**< Opaque Buffer */    WBXML_VALUE_ELEMENT_ATTR_TOKEN /**< Attribute Value Token */#if defined( WBXML_ENCODER_USE_STRTBL )    , WBXML_VALUE_ELEMENT_TABLEREF   /**< String Table Reference */#endif /* WBXML_ENCODER_USE_STRTBL */} WBXMLValueElementType;/** * @brief WBXML Value Element Structure */typedef struct WBXMLValueElement_t {    WBXMLValueElementType type;     /**< Cf WBXMLValueElementType enum */    union {        WBXMLBuffer *str;                   /**< WBXML_VALUE_ELEMENT_STRING */        const WBXMLExtValueEntry *ext;      /**< WBXML_VALUE_ELEMENT_EXTENSION */        WBXMLBuffer *buff;                  /**< WBXML_VALUE_ELEMENT_OPAQUE */        const WBXMLAttrValueEntry *attr;    /**< WBXML_VALUE_ELEMENT_ATTR_TOKEN */#if defined( WBXML_ENCODER_USE_STRTBL )        WB_ULONG    index;                  /**< WBXML_VALUE_ELEMENT_TABLEREF */#endif /* WBXML_ENCODER_USE_STRTBL */    } u;} WBXMLValueElement;/*************************************************** *    Private Functions prototypes *//******************************* * Common Functions */#if 0static WB_BOOL convert_char_to_ucs4(WB_UTINY ch, WB_ULONG *result);#endif /* 0 */static WBXMLEncoder *encoder_duplicate(WBXMLEncoder *encoder);static WBXMLError encoder_encode_tree(WBXMLEncoder *encoder);static WB_BOOL encoder_init_output(WBXMLEncoder *encoder);/******************************* * WBXML Tree Parsing Functions */static WBXMLError parse_node(WBXMLEncoder *encoder, WBXMLTreeNode *node, WB_BOOL enc_end);static WBXMLError parse_element(WBXMLEncoder *encoder, WBXMLTreeNode *node, WB_BOOL has_content);static WBXMLError parse_element_end(WBXMLEncoder *encoder, WBXMLTreeNode *node, WB_BOOL has_content);static WBXMLError parse_attribute(WBXMLEncoder *encoder, WBXMLAttribute *attribute);static WBXMLError parse_text(WBXMLEncoder *encoder, WBXMLTreeNode *node);static WBXMLError parse_cdata(WBXMLEncoder *encoder, WBXMLTreeNode *node);static WBXMLError parse_pi(WBXMLEncoder *encoder, WBXMLTreeNode *node);static WBXMLError parse_tree(WBXMLEncoder *encoder, WBXMLTreeNode *node);/******************************* * WBXML Output Functions *//* Build WBXML Result */static WBXMLError wbxml_build_result(WBXMLEncoder *encoder, WB_UTINY **wbxml, WB_ULONG *wbxml_len);static WBXMLError wbxml_fill_header(WBXMLEncoder *encoder, WBXMLBuffer *header);/* WBXML Encoding Functions */static WBXMLError wbxml_encode_end(WBXMLEncoder *encoder);static WBXMLError wbxml_encode_tag(WBXMLEncoder *encoder, WBXMLTreeNode *node, WB_BOOL has_content);static WBXMLError wbxml_encode_tag_literal(WBXMLEncoder *encoder, WB_UTINY *tag, WB_UTINY mask);static WBXMLError wbxml_encode_tag_token(WBXMLEncoder *encoder, WB_UTINY token, WB_UTINY page);static WBXMLError wbxml_encode_attr(WBXMLEncoder *encoder, WBXMLAttribute *attribute);static WBXMLError wbxml_encode_attr_start(WBXMLEncoder *encoder, WBXMLAttribute *attribute, WB_UTINY **value);static WBXMLError wbxml_encode_value_element_buffer(WBXMLEncoder *encoder, WB_UTINY *value, WBXMLValueElementCtx ctx);static WBXMLError wbxml_encode_value_element_list(WBXMLEncoder *encoder, WBXMLList *list);static WBXMLError wbxml_encode_attr_start_literal(WBXMLEncoder *encoder, const WB_UTINY *attr);static WBXMLError wbxml_encode_attr_token(WBXMLEncoder *encoder, WB_UTINY token, WB_UTINY page);static WBXMLError wbxml_encode_inline_string(WBXMLEncoder *encoder, WBXMLBuffer *str);static WBXMLError wbxml_encode_inline_integer_extension_token(WBXMLEncoder *encoder, WB_UTINY ext, WB_UTINY value);#if 0static WBXMLError wbxml_encode_entity(WBXMLEncoder *encoder, WB_ULONG value);#endif /* 0 */static WBXMLError wbxml_encode_opaque(WBXMLEncoder *encoder, WBXMLBuffer *buff);#if defined( WBXML_ENCODER_USE_STRTBL )static WBXMLError wbxml_encode_tableref(WBXMLEncoder *encoder, WB_ULONG offset);#endif /* WBXML_ENCODER_USE_STRTBL */static WBXMLValueElement *wbxml_value_element_create(void);static void wbxml_value_element_destroy(WBXMLValueElement *elt);static void wbxml_value_element_destroy_item(void *elt);static WBXMLError wbxml_encode_tree(WBXMLEncoder *encoder, WBXMLTree *tree);#if ( defined( WBXML_SUPPORT_SI ) || defined( WBXML_SUPPORT_EMN ) )static WBXMLError wbxml_encode_datetime(WBXMLEncoder *encoder, WB_UTINY *buffer);#endif /* WBXML_SUPPORT_SI || WBXML_SUPPORT_EMN */#if defined( WBXML_SUPPORT_WV )static WBXMLError wbxml_encode_wv_content(WBXMLEncoder *encoder, WB_UTINY *buffer);static WBXMLError wbxml_encode_wv_integer(WBXMLEncoder *encoder, WB_UTINY *buffer);static WBXMLError wbxml_encode_wv_datetime(WBXMLEncoder *encoder, WB_UTINY *buffer);#endif /* WBXML_SUPPORT_WV */#if defined( WBXML_SUPPORT_DRMREL )static WBXMLError wbxml_encode_drmrel_content(WBXMLEncoder *encoder, WB_UTINY *buffer);#endif /* WBXML_SUPPORT_DRMREL */#if defined( WBXML_ENCODER_USE_STRTBL )/* WBXML String Table Functions */static WBXMLStringTableElement *wbxml_strtbl_element_create(WBXMLBuffer *string, WB_BOOL is_stat);static void wbxml_strtbl_element_destroy(WBXMLStringTableElement *element);static void wbxml_strtbl_element_destroy_item(void *element);static WBXMLError wbxml_strtbl_initialize(WBXMLEncoder *encoder, WBXMLTreeNode *root);static void wbxml_strtbl_collect_strings(WBXMLEncoder *encoder, WBXMLTreeNode *node, WBXMLList *strings);static WBXMLError wbxml_strtbl_collect_words(WBXMLList *elements, WBXMLList **result);static WBXMLError wbxml_strtbl_construct(WBXMLBuffer *buff, WBXMLList *strstbl);static WBXMLError wbxml_strtbl_check_references(WBXMLEncoder *encoder, WBXMLList **strings, WBXMLList **one_ref, WB_BOOL stat_buff);static WB_BOOL wbxml_strtbl_add_element(WBXMLEncoder *encoder, WBXMLStringTableElement *elt, WB_ULONG *index, WB_BOOL *added);#endif /* WBXML_ENCODER_USE_STRTBL *//******************************* * XML Output Functions *//** New Line */#define WBXML_ENCODER_XML_NEW_LINE ((WB_UTINY *)"\n")/* XML Header Macros */#define WBXML_ENCODER_XML_HEADER "<?xml version=\"1.0\"?>"#define WBXML_ENCODER_XML_DOCTYPE "<!DOCTYPE "#define WBXML_ENCODER_XML_PUBLIC " PUBLIC \""#define WBXML_ENCODER_XML_DTD "\" \""#define WBXML_ENCODER_XML_END_DTD "\">"/* Global vars for XML Normalization */const WB_UTINY xml_lt[5]     = "&lt;";   /**< &lt; */const WB_UTINY xml_gt[5]     = "&gt;";   /**< &gt; */const WB_UTINY xml_amp[6]    = "&amp;";  /**< &amp; */const WB_UTINY xml_quot[7]   = "&quot;"; /**< &quot; */const WB_UTINY xml_apos[7]   = "&apos;"; /**< &apos; */const WB_UTINY xml_slashr[6] = "&#13;";  /**< &#13; */const WB_UTINY xml_slashn[6] = "&#10;";  /**< &#10; */const WB_UTINY xml_tab[5]    = "&#9;";   /**< &#9; *//* Build XML Result */static WBXMLError xml_build_result(WBXMLEncoder *encoder, WB_UTINY **xml, WB_ULONG *xml_len);static WBXMLError xml_fill_header(WBXMLEncoder *encoder, WBXMLBuffer *header);/* XML Encoding Functions */static WBXMLError xml_encode_tag(WBXMLEncoder *encoer, WBXMLTreeNode *node);static WBXMLError xml_encode_end_tag(WBXMLEncoder *encoder, WBXMLTreeNode *node);static WBXMLError xml_encode_attr(WBXMLEncoder *encoder, WBXMLAttribute *attribute);static WBXMLError xml_encode_end_attrs(WBXMLEncoder *encoder, WBXMLTreeNode *node);static WBXMLError xml_encode_text(WBXMLEncoder *encoder, WBXMLTreeNode *node);static WB_BOOL xml_encode_new_line(WBXMLBuffer *buff);static WB_BOOL xml_fix_text(WBXMLBuffer *buff, WB_BOOL normalize);static WBXMLError xml_encode_cdata(WBXMLEncoder *encoder);static WBXMLError xml_encode_end_cdata(WBXMLEncoder *encoder);static WBXMLError xml_encode_tree(WBXMLEncoder *encoder, WBXMLTree *tree);/*************************************************** *    Public Functions */WBXML_DECLARE(WBXMLEncoder *) wbxml_encoder_create_real(void){    WBXMLEncoder *encoder = NULL;    encoder = (WBXMLEncoder *) wbxml_malloc(sizeof(WBXMLEncoder));    if (encoder == NULL) {        return NULL;    }#if defined( WBXML_ENCODER_USE_STRTBL )    if ((encoder->strstbl = wbxml_list_create()) == NULL) {        wbxml_free(encoder);        return NULL;    }    encoder->use_strtbl = TRUE;    encoder->strstbl_len = 0;#endif /* WBXML_ENCODER_USE_STRTBL */    encoder->tree = NULL;    encoder->lang = NULL;    encoder->output = NULL;    encoder->output_header = NULL;    encoder->current_tag = NULL;    encoder->current_text_parent = NULL;    encoder->current_attr = NULL;    encoder->tagCodePage = 0;    encoder->attrCodePage = 0;    encoder->ignore_empty_text = FALSE;    encoder->remove_text_blanks = FALSE;    encoder->output_type = WBXML_ENCODER_OUTPUT_WBXML;    encoder->xml_gen_type = WBXML_GEN_XML_COMPACT;    encoder->indent_delta = 1;    encoder->indent = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -