📄 wml_compiler.c
字号:
/* ==================================================================== * The Kannel Software License, Version 1.0 * * Copyright (c) 2001-2004 Kannel Group * Copyright (c) 1998-2001 WapIT Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Kannel Group (http://www.kannel.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Kannel" and "Kannel Group" must not be used to * endorse or promote products derived from this software without * prior written permission. For written permission, please * contact org@kannel.org. * * 5. Products derived from this software may not be called "Kannel", * nor may "Kannel" appear in their name, without prior written * permission of the Kannel Group. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Kannel Group. For more information on * the Kannel Group, please see <http://www.kannel.org/>. * * Portions of this software are based upon software originally written at * WapIT Ltd., Helsinki, Finland for the Kannel project. */ /* * wml_compiler.c - compiling WML to WML binary * * This is an implemention for WML compiler for compiling the WML text * format to WML binary format, which is used for transmitting the * decks to the mobile terminal to decrease the use of the bandwidth. * * * Tuomas Luttinen for Wapit Ltd. */#include <time.h>#include <unistd.h>#include <sys/types.h>#include <sys/stat.h>#include <fcntl.h>#include <string.h>#include <math.h>#include <ctype.h>#include <libxml/xmlmemory.h>#include <libxml/tree.h>#include <libxml/debugXML.h>#include <libxml/encoding.h>#include "gwlib/gwlib.h"#include "wml_compiler.h"#include "xml_definitions.h"/*********************************************************************** * Declarations of data types. */struct wml_externalid_t { char *string; char value;};typedef struct wml_externalid_t wml_externalid_t;static wml_externalid_t wml_externalid[] = { { "-//WAPFORUM//DTD WML 1.3//EN", 0x0A }, { "-//WAPFORUM//DTD WML 1.2//EN", 0x09 }, { "-//WAPFORUM//DTD WML 1.1//EN", 0x04 }, { "-//WAPFORUM//DTD WML 1.0//EN", 0x02 }};#define NUMBER_OF_WML_EXTERNALID sizeof(wml_externalid)/sizeof(wml_externalid[0])struct wbxml_version_t { char *string; char value;};typedef struct wbxml_version_t wbxml_version_t;static wbxml_version_t wbxml_version[] = { { "1.3", 0x03 }, { "1.2", 0x02 }, { "1.1", 0x01 },};#define NUMBER_OF_WBXML_VERSION sizeof(wbxml_version)/sizeof(wbxml_version[0])typedef enum { NOESC, ESC, UNESC, FAILED } var_esc_t;/* * The wml token table node with two fields. */typedef struct { char *text; unsigned char token;} wml_table_t;/* * The wml token table node with three fields. */typedef struct { char *text1; char *text2; unsigned char token;} wml_table3_t;/* * The binary WML structure, that has been passed around between the * internal functions. It contains the header fields for wbxml version, * the WML public ID and the character set, the length of the string table, * the list structure implementing the string table and the octet string * containing the encoded WML binary. */typedef struct { unsigned char wbxml_version; unsigned char wml_public_id; unsigned long character_set; unsigned long string_table_length; List *string_table; Octstr *wbxml_string;} wml_binary_t;/* * The string table list node. */typedef struct { unsigned long offset; Octstr *string;} string_table_t;/* * The string table proposal list node. */typedef struct { int count; Octstr *string;} string_table_proposal_t;/* * The wml hash table node. */typedef struct { Octstr *item; unsigned char binary;} wml_hash_t;/* * The hash table node for attribute and values. */typedef struct { Octstr *attribute; unsigned char binary; List *value_list;} wml_attribute_t;#include "xml_shared.h"#include "wml_definitions.h"/*********************************************************************** * Declarations of global variables. */Dict *wml_elements_dict;Dict *wml_attributes_dict;List *wml_attr_values_list;List *wml_URL_values_list;/*********************************************************************** * Declarations of internal functions. These are defined at the end of * the file. *//* * Parsing functions. These funtions operate on a single node or a * smaller datatype. Look for more details on the functions at the * definitions. */static int parse_document(xmlDocPtr document, Octstr *charset, wml_binary_t **wbxml, Octstr *version);static int parse_node(xmlNodePtr node, wml_binary_t **wbxml);static int parse_element(xmlNodePtr node, wml_binary_t **wbxml);static int parse_attribute(xmlAttrPtr attr, wml_binary_t **wbxml);static int parse_attr_value(Octstr *attr_value, List *tokens, wml_binary_t **wbxml, int charset, var_esc_t default_esc);static int parse_text(xmlNodePtr node, wml_binary_t **wbxml);static int parse_cdata(xmlNodePtr node, wml_binary_t **wbxml);static int parse_st_octet_string(Octstr *ostr, int cdata, var_esc_t default_esc, wml_binary_t **wbxml);static void parse_st_end(wml_binary_t **wbxml);static void parse_entities(Octstr *wml_source);/* * Variable functions. These functions are used to find and parse variables. */static int parse_variable(Octstr *text, int start, var_esc_t default_esc, Octstr **output, wml_binary_t **wbxml);static Octstr *get_variable(Octstr *text, int start);static var_esc_t check_variable_syntax(Octstr *variable, var_esc_t default_esc);/* * wml_binary-functions. These are used to create, destroy and modify * wml_binary_t. */static wml_binary_t *wml_binary_create(void);static void wml_binary_destroy(wml_binary_t *wbxml);static void wml_binary_output(Octstr *ostr, wml_binary_t *wbxml);/* Output into the wml_binary. */static void output_st_char(int byte, wml_binary_t **wbxml);static void output_st_octet_string(Octstr *ostr, wml_binary_t **wbxml);static void output_variable(Octstr *variable, Octstr **output, var_esc_t escaped, wml_binary_t **wbxml);/* * Memory allocation and deallocations. */static wml_hash_t *hash_create(char *text, unsigned char token);static wml_attribute_t *attribute_create(void);static void attr_dict_construct(wml_table3_t *attributes, Dict *attr_dict);static void hash_destroy(void *p);static void attribute_destroy(void *p);/* * Comparison functions for the hash tables. */static int hash_cmp(void *hash1, void *hash2);/* * Miscellaneous help functions. */static int check_do_elements(xmlNodePtr node);static var_esc_t check_variable_name(xmlNodePtr node);static Octstr *get_do_element_name(xmlNodePtr node);static int check_if_url(int hex);static int check_if_emphasis(xmlNodePtr node);static int wml_table_len(wml_table_t *table);static int wml_table3_len(wml_table3_t *table);/* * String table functions, used to add and remove strings into and from the * string table. */static string_table_t *string_table_create(int offset, Octstr *ostr);static void string_table_destroy(string_table_t *node);static string_table_proposal_t *string_table_proposal_create(Octstr *ostr);static void string_table_proposal_destroy(string_table_proposal_t *node);static void string_table_build(xmlNodePtr node, wml_binary_t **wbxml);static void string_table_collect_strings(xmlNodePtr node, List *strings);static List *string_table_collect_words(List *strings);static List *string_table_sort_list(List *start);static List *string_table_add_many(List *sorted, wml_binary_t **wbxml);static unsigned long string_table_add(Octstr *ostr, wml_binary_t **wbxml);static void string_table_apply(Octstr *ostr, wml_binary_t **wbxml);static void string_table_output(Octstr *ostr, wml_binary_t **wbxml);/*********************************************************************** * Implementations of the functions declared in wml_compiler.h. *//* * The actual compiler function. This operates as interface to the compiler. * For more information, look wml_compiler.h. */int wml_compile(Octstr *wml_text, Octstr *charset, Octstr **wml_binary, Octstr *version){ int ret = 0; size_t size; xmlDocPtr pDoc = NULL; char *wml_c_text; wml_binary_t *wbxml = NULL; Octstr *encoding = NULL; *wml_binary = octstr_create(""); wbxml = wml_binary_create(); /* Remove the extra space from start and the end of the WML Document. */ octstr_strip_blanks(wml_text); /* Check the WML-code for \0-characters and for WML entities. Fast patch. -- tuo */ parse_entities(wml_text); /* transcode from charset to UTF-8 */ if (charset && octstr_len(charset) && octstr_case_compare(charset, octstr_imm("UTF-8")) == -1) { debug("wml_compile", 0, "WML compiler: Transcoding from <%s> to UTF-8", octstr_get_cstr(charset)); set_charset(wml_text, charset); } /* * If we did not set the character set encoding yet, then obviously * there was no charset argument in the Content-Type HTTP reply header. * We have to scan the xml preamble line for an explicite encoding * definition to allow transcoding from UTF-8 to that charset after * libxml2 did all it's parsing magic. (Keep in mind libxml2 uses UTF-8 * as internal encoding.) -- Stipe */ /* * We will trust the xml preamble encoding more then the HTTP header * charset definition. */ if ((encoding = find_charset_encoding(wml_text)) != NULL) { /* ok, we rely on the xml preamble encoding */ } else if (charset && octstr_len(charset) > 0) { /* we had a HTTP response charset, use this */ encoding = octstr_duplicate(charset); } else { /* we had none, so use UTF-8 as default */ encoding = octstr_create("UTF-8"); } size = octstr_len(wml_text); wml_c_text = octstr_get_cstr(wml_text); if (octstr_search_char(wml_text, '\0', 0) != -1) { error(0, "WML compiler: Compiling error: " "\\0 character found in the middle of the WML source."); ret = -1; } else { /* * An empty octet string for the binary output is created, the wml * source is parsed into a parsing tree and the tree is then compiled * into binary. */ pDoc = xmlParseMemory(wml_c_text, size); if (pDoc != NULL) { /* * If we have a set internal encoding, then apply this information * to the XML parsing tree document for later transcoding ability. */ if (encoding) pDoc->charset = xmlParseCharEncoding(octstr_get_cstr(encoding)); ret = parse_document(pDoc, encoding, &wbxml, version); wml_binary_output(*wml_binary, wbxml); } else { error(0, "WML compiler: Compiling error: " "libxml returned a NULL pointer"); ret = -1; } } wml_binary_destroy(wbxml); octstr_destroy(encoding); if (pDoc) xmlFreeDoc(pDoc); return ret;}/* * Initialization: makes up the hash tables for the compiler. */void wml_init(){ int i = 0, len = 0; wml_hash_t *temp = NULL; /* The wml elements into a hash table. */ len = wml_table_len(wml_elements); wml_elements_dict = dict_create(len, hash_destroy); for (i = 0; i < len; i++) { temp = hash_create(wml_elements[i].text, wml_elements[i].token); dict_put(wml_elements_dict, temp->item, temp); } /* Attributes. */ len = wml_table3_len(wml_attributes); wml_attributes_dict = dict_create(len, attribute_destroy); attr_dict_construct(wml_attributes, wml_attributes_dict); /* Attribute values. */ len = wml_table_len(wml_attribute_values); wml_attr_values_list = list_create(); for (i = 0; i < len; i++) { temp = hash_create(wml_attribute_values[i].text, wml_attribute_values[i].token); list_append(wml_attr_values_list, temp); } /* URL values. */ len = wml_table_len(wml_URL_values); wml_URL_values_list = list_create(); for (i = 0; i < len; i++) { temp = hash_create(wml_URL_values[i].text, wml_URL_values[i].token); list_append(wml_URL_values_list, temp); }}/* * Shutdown: Frees the memory allocated by initialization. */void wml_shutdown(){ dict_destroy(wml_elements_dict); dict_destroy(wml_attributes_dict); list_destroy(wml_attr_values_list, hash_destroy); list_destroy(wml_URL_values_list, hash_destroy);}/*********************************************************************** * Internal functions. *//* * parse_node - the recursive parsing function for the parsing tree. * Function checks the type of the node, calls for the right parse * function for the type, then calls itself for the first child of * the current node if there's one and after that calls itself for the * next child on the list. */static int parse_node(xmlNodePtr node, wml_binary_t **wbxml){ int status = 0; /* Call for the parser function of the node type. */ switch (node->type) { case XML_ELEMENT_NODE: status = parse_element(node, wbxml); break; case XML_TEXT_NODE: status = parse_text(node, wbxml); break; case XML_CDATA_SECTION_NODE: status = parse_cdata(node, wbxml); break; case XML_COMMENT_NODE: case XML_PI_NODE: /* Comments and PIs are ignored. */ break; /* * XML has also many other node types, these are not needed with * WML. Therefore they are assumed to be an error. */ default: error(0, "WML compiler: Unknown XML node in the WML source."); return -1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -