📄 wbxml_parser.c
字号:
parser->public_id = WBXML_PUBLIC_ID_UNKNOWN; parser->public_id_index = -1; parser->charset = WBXML_CHARSET_UNKNOWN; parser->meta_charset = WBXML_CHARSET_UNKNOWN; parser->version = WBXML_VERSION_UNKNOWN; parser->pos = 0; parser->tagCodePage = 0; parser->attrCodePage = 0; }/****************** * Check functions *//** * @brief Check if current byte a specified WBXML token * @param parser The WBXML Parser * @param token The WBXML token * @return TRUE is current byte is the specified token, FALSE otherwise */static WB_BOOL is_token(WBXMLParser *parser, WB_UTINY token){ WB_UTINY result; if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, &result)) return FALSE; return (WB_BOOL) (result == token);}/** * @brief Check if current byte is a WBXML literalTag token * @param parser The WBXML Parser * @return TRUE is current byte is a literalTag token, FALSE otherwise */static WB_BOOL is_literal(WBXMLParser *parser){ WB_UTINY result; if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, &result)) return FALSE; return (WB_BOOL) ((result == WBXML_LITERAL) || (result == WBXML_LITERAL_A) || (result == WBXML_LITERAL_C) || (result == WBXML_LITERAL_AC));}/** * @brief Check if next token to parse is an Attribute Value * @param parser The WBXML Parser * @return TRUE if next token to parse is an Attribute Value, FALSE otherwise * @note attrValue = ([switchPage] ATTRVALUE | string | extension | entity | opaque) */static WB_BOOL is_attr_value(WBXMLParser *parser){ WB_UTINY cur_byte, next_byte; /* Get current byte */ if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, &cur_byte)) return FALSE; /* If current byte is a switch page, check that following token is an Attribute Value token */ if (is_token(parser, WBXML_SWITCH_PAGE)) { if (!wbxml_buffer_get_char(parser->wbxml, parser->pos + 2, &next_byte)) return FALSE; /* Attribute Value is greater than or equal to 128 */ if ((next_byte & 0x80) == 0x80) return TRUE; } /* Else, check current byte is an Attribute Value, a string, an extension, an entity or an opaque */ if (((cur_byte & 0x80) == 0x80) || (is_string(parser)) || (is_extension(parser)) || (is_token(parser, WBXML_ENTITY)) || (is_token(parser, WBXML_OPAQUE))) return TRUE; return FALSE;}/** * @brief Check if current byte is a string * @param parser The WBXML Parser * @return TRUE if current byte is a string, FALSE otherwise */static WB_BOOL is_string(WBXMLParser *parser){ return (WB_BOOL) (is_token(parser, WBXML_STR_I) || is_token(parser, WBXML_STR_T));}/** * @brief Check if current byte is an extension * @param parser The WBXML Parser * @return TRUE if current byte is an extension, FALSE otherwise */static WB_BOOL is_extension(WBXMLParser *parser){ WB_UTINY cur_byte; /* If current byte is a switch page, check the following token */ if (is_token(parser, WBXML_SWITCH_PAGE)) { if (!wbxml_buffer_get_char(parser->wbxml, parser->pos + 2, &cur_byte)) return FALSE; } else { if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, &cur_byte)) return FALSE; } return (WB_BOOL) ((cur_byte == WBXML_EXT_I_0) || (cur_byte == WBXML_EXT_I_1) || (cur_byte == WBXML_EXT_I_2) || (cur_byte == WBXML_EXT_T_0) || (cur_byte == WBXML_EXT_T_1) || (cur_byte == WBXML_EXT_T_2) || (cur_byte == WBXML_EXT_0) || (cur_byte == WBXML_EXT_1) || (cur_byte == WBXML_EXT_2));}/** * @brief Check the Public ID * @param parser The WBXML Parser * @return TRUE if Public ID is found, FALSE otherwise */static WB_BOOL check_public_id(WBXMLParser *parser){ WBXMLBuffer *public_id = NULL; WB_LONG index = 0; WBXML_DEBUG((WBXML_PARSER, "\t Checking PublicID")); /* First check if we can figure out the Public ID */ if ((parser->lang_forced == WBXML_LANG_UNKNOWN) && (parser->public_id == WBXML_PUBLIC_ID_UNKNOWN) && (parser->public_id_index == -1)) { return FALSE; } /******************************************************** * Case 1: Language is forced by user */ if (parser->lang_forced != WBXML_LANG_UNKNOWN) { /* Search Language Entry */ while (parser->mainTable[index].langID != -1) { if (parser->mainTable[index].langID == parser->lang_forced) { parser->langTable = &(parser->mainTable[index]); WBXML_DEBUG((WBXML_PARSER, "\t Language Forced - PublicID : '%s'", parser->mainTable[index].publicID->xmlPublicID)); return TRUE; } index++; } } /******************************************************** * Case 2: Public ID is a normal token * (found in WBXML Document, or forced by user) */ if (parser->public_id != WBXML_PUBLIC_ID_UNKNOWN) { WBXML_DEBUG((WBXML_PARSER, "\t PublicID token: 0x%X", parser->public_id)); /* Search Public ID Table */ while (parser->mainTable[index].publicID != NULL) { if (parser->mainTable[index].publicID->wbxmlPublicID == parser->public_id) { parser->langTable = &(parser->mainTable[index]); WBXML_DEBUG((WBXML_PARSER, "\t PublicID : '%s'", parser->mainTable[index].publicID->xmlPublicID)); return TRUE; } index++; } } /******************************************************** * Case 3: Public ID referenced in String Table */ if (parser->public_id_index != -1) { WBXML_DEBUG((WBXML_PARSER, "\t PublicID is in String Table (index: 0x%X)", parser->public_id_index)); if (get_strtbl_reference(parser, (WB_ULONG) parser->public_id_index, &public_id) != WBXML_OK) { WBXML_ERROR((WBXML_PARSER, "Bad publicID reference in string table")); return FALSE; } WBXML_DEBUG((WBXML_PARSER, "\t PublicID : '%s'", wbxml_buffer_get_cstr(public_id))); /* Search Public ID Table */ while (parser->mainTable[index].publicID != NULL) { if ((parser->mainTable[index].publicID->xmlPublicID != NULL) && (WBXML_STRCASECMP(parser->mainTable[index].publicID->xmlPublicID, wbxml_buffer_get_cstr(public_id)) == 0)) { parser->langTable = &(parser->mainTable[index]); /* parser->public_id = parser->mainTable[index].publicID->wbxmlPublicID; */ wbxml_buffer_destroy(public_id); return TRUE; } index++; } /* Clean up */ wbxml_buffer_destroy(public_id); } /* Public ID not found in Tables */ return FALSE;}/*************************** * WBXML Parse functions *//** * @brief Parse WBXML version * @param parser The WBXML Parser * @return WBXML_OK if parsing is OK, an error code otherwise * @note version = u_int8 */static WBXMLError parse_version(WBXMLParser *parser){ WBXMLError ret = WBXML_OK; /* Initialize version: 1.0 * * Do NOT keep 'WBXML_VERSION_UNKNOWN' (0xffffffff) because only one byte will change. * (for example, if the version is 0x02, then parser->version will be 0xffffff02) */ parser->version = WBXML_VERSION_10; if ((ret = parse_uint8(parser, (WB_UTINY*) &parser->version)) != WBXML_OK) return ret; WBXML_DEBUG((WBXML_PARSER, "(%d) Parsed version: '0x%X'", parser->pos - 1, (WB_TINY) parser->version)); return WBXML_OK;}/** * @brief Parse WBXML public id * @param parser The WBXML Parser * @return WBXML_OK if parsing is OK, an error code otherwise * @note publicid = mb_u_int32 | ( zero index ) * @note index = mb_u_int32 */static WBXMLError parse_publicid(WBXMLParser *parser){ WB_UTINY public_id; if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, &public_id)) return WBXML_ERROR_END_OF_BUFFER; WBXML_DEBUG((WBXML_PARSER, "(%d) Parsed publicid: '0x%X'", parser->pos, public_id)); if (public_id == 0x00) { parser->pos++; /* Get index (we will retreive the Public ID latter) */ return parse_mb_uint32(parser, (WB_ULONG *)&parser->public_id_index); } else { /* Get Public ID */ return parse_mb_uint32(parser, &parser->public_id); }}/** * @brief Parse WBXML charset * @param parser The WBXML Parser * @return WBXML_OK if parsing is OK, an error code otherwise * @note charset = mb_u_int32 * @note "The binary XML format contains a representation of the XML document character encoding. * This is the WBXML equivalent of the XML document format encoding attribute, * which is specified in the ?xml processing instruction. The character set is encoded as * a multi-byte positive integer value, representing the IANA-assigned MIB number for * a character set. A value of zero indicates an unknown document encoding. In the case of * an unknown encoding, transport meta-information should be used to determine the character * encoding. If transport meta-information is unavailable, the default encoding of UTF-8 * should be assumed." */static WBXMLError parse_charset(WBXMLParser *parser){#if defined( WBXML_LIB_VERBOSE ) WB_ULONG startpos = parser->pos;#endif /* WBXML_LIB_VERBOSE */ WBXMLError ret = parse_mb_uint32(parser, (WB_ULONG *)&parser->charset); WBXML_DEBUG((WBXML_PARSER, "(%d) Parsed charset: '0x%X'", startpos, parser->charset)); return ret;}/** * @brief Parse WBXML string table * @param parser The WBXML Parser * @return WBXML_OK if parsing is OK, an error code otherwise * @note strtbl = length *byte * @note length = mb_u_int32 */static WBXMLError parse_strtbl(WBXMLParser *parser){ WB_UTINY *data = NULL; WB_ULONG strtbl_len = 0; WB_UTINY end_char = 0; WBXMLError ret = WBXML_OK; WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing strtbl", parser->pos)); /* Get String Table Length */ ret = parse_mb_uint32(parser, &strtbl_len); if (ret != WBXML_OK) return WBXML_ERROR_END_OF_BUFFER; if (strtbl_len > 0) { /* Check this string table length */ if (parser->pos + strtbl_len > wbxml_buffer_len(parser->wbxml)) return WBXML_ERROR_STRTBL_LENGTH; /* Get String Table */ data = wbxml_buffer_get_cstr(parser->wbxml); parser->strstbl = wbxml_buffer_create(data + parser->pos, strtbl_len, WBXML_PARSER_STRING_TABLE_MALLOC_BLOCK); if (parser->strstbl == NULL) return WBXML_ERROR_NOT_ENOUGH_MEMORY; /** @todo Damned ! Check the charset ! This may not be a simple NULL terminated string ! */ /* Some phones doesn't terminate the String Table with a null char... let's correct this */ if (!wbxml_buffer_get_char(parser->strstbl, wbxml_buffer_len(parser->strstbl) - 1, &end_char)) { return WBXML_ERROR_INTERNAL; } if (end_char != '\0') { /* Append NULL char to end of String Table */ if (!wbxml_buffer_append_char(parser->strstbl, '\0')) return WBXML_ERROR_NOT_ENOUGH_MEMORY; } parser->pos = parser->pos + strtbl_len; } return WBXML_OK;}/** * @brief Parse WBXML body * @param parser The WBXML Parser * @return WBXML_OK if parsing is OK, an error code otherwise * @note body = *pi element *pi */static WBXMLError parse_body(WBXMLParser *parser){ WBXMLError ret = WBXML_OK; WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing body", parser->pos)); while (is_token(parser, WBXML_PI)) { if ((ret = parse_pi(parser)) != WBXML_OK) return ret; } if ((ret = parse_element(parser)) != WBXML_OK) return ret; while (is_token(parser, WBXML_PI)) { if ((ret = parse_pi(parser)) != WBXML_OK) return ret; } return WBXML_OK;}/** * @brief Parse WBXML pi * @param parser The WBXML Parser * @return WBXML_OK if parsing is OK, an error code otherwise * @note pi = PI attrStart *attrValue END */static WBXMLError parse_pi(WBXMLParser *parser){ WBXMLAttributeName *attr_name = NULL; const WB_UTINY *start_value = NULL; WBXMLBuffer *attr_value = NULL; WBXMLBuffer *tmp_value = NULL; WBXMLError ret = WBXML_OK; WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing PI", parser->pos)); /* Skip PI */ parser->pos++; /* Parse attrStart */ if ((ret = parse_attr_start(parser,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -