📄 xml_parser.c
字号:
/* * GPAC - Multimedia Framework C SDK * * Authors: Jean le Feuvre * Copyright (c) 2005-200X ENST * All rights reserved * * This file is part of GPAC / common tools sub-project * * GPAC is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * GPAC is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * */#include <gpac/xml.h>#include <gpac/utf.h>/*since 0.2.2, we use zlib for xmt/x3d reading to handle gz files*/#include <zlib.h>static GF_Err gf_xml_sax_parse_intern(GF_SAXParser *parser, char *current);static char *xml_translate_xml_string(char *str){ char *value; u32 size, i, j; if (!str || !strlen(str)) return NULL; value = (char *)malloc(sizeof(char) * 500); size = 500; i = j = 0; while (str[i]) { if (j >= size) { size += 500; value = (char *)realloc(value, sizeof(char)*size); } if (str[i] == '&') { if (str[i+1]=='#') { char szChar[20], *end; u16 wchar[2]; const unsigned short *srcp; strncpy(szChar, str+i, 10); end = strchr(szChar, ';'); assert(end); end[1] = 0; i+=strlen(szChar); wchar[1] = 0; sscanf(szChar, "&#%hd;", &wchar[0]); srcp = wchar; j += gf_utf8_wcstombs(&value[j], 20, &srcp); } else if (!strnicmp(&str[i], "&", sizeof(char)*5)) { value[j] = '&'; j++; i+= 5; } else if (!strnicmp(&str[i], "<", sizeof(char)*4)) { value[j] = '<'; j++; i+= 4; } else if (!strnicmp(&str[i], ">", sizeof(char)*4)) { value[j] = '>'; j++; i+= 4; } else if (!strnicmp(&str[i], "'", sizeof(char)*6)) { value[j] = '\''; j++; i+= 6; } else if (!strnicmp(&str[i], """, sizeof(char)*6)) { value[j] = '\"'; j++; i+= 6; } else { value[j] = str[i]; j++; i++; } } else { value[j] = str[i]; j++; i++; } } value[j] = 0; return value;}enum{ SAX_STATE_ATT_NAME, SAX_STATE_ATT_VALUE, SAX_STATE_ELEMENT, SAX_STATE_COMMENT, SAX_STATE_TEXT_CONTENT, SAX_STATE_ENTITY, SAX_STATE_SKIP_DOCTYPE, SAX_STATE_CDATA, SAX_STATE_DONE, SAX_STATE_XML_PROC, SAX_STATE_SYNTAX_ERROR,};typedef struct{ u32 name_start, name_end; u32 val_start, val_end; Bool has_entities;} GF_XMLSaxAttribute;struct _tag_sax_parser{ /*0: UTF-8, 1: UTF-16 BE, 2: UTF-16 LE. String input is always converted back to utf8*/ s32 unicode_type; char *buffer; /*alloc size, line size and current position*/ u32 alloc_size, line_size, current_pos; /*current node depth*/ u32 node_depth; /*gz input file*/ gzFile gz_in; /*current line , file size and pos for user notif*/ u32 line, file_size, file_pos; /*SAX callbacks*/ gf_xml_sax_node_start sax_node_start; gf_xml_sax_node_end sax_node_end; gf_xml_sax_text_content sax_text_content; void *sax_cbck; gf_xml_sax_progress on_progress; u32 sax_state; u32 init_state; GF_List *entities; char att_sep; Bool in_entity, suspended; u32 in_quote; u32 elt_start_pos, elt_end_pos; /*last error found*/ char err_msg[1000]; u32 att_name_start, elt_name_start, elt_name_end, text_start, text_end; GF_XMLAttribute *attrs; GF_XMLSaxAttribute *sax_attrs; u32 nb_attrs, nb_alloc_attrs;};static GF_XMLSaxAttribute *xml_get_sax_attribute(GF_SAXParser *parser){ if (parser->nb_attrs==parser->nb_alloc_attrs) { parser->nb_alloc_attrs++; parser->sax_attrs = (GF_XMLSaxAttribute *)realloc(parser->sax_attrs, sizeof(GF_XMLSaxAttribute)*parser->nb_alloc_attrs); parser->attrs = (GF_XMLAttribute *)realloc(parser->attrs, sizeof(GF_XMLAttribute)*parser->nb_alloc_attrs); } return &parser->sax_attrs[parser->nb_attrs++];}static void xml_sax_swap(GF_SAXParser *parser){ if (parser->current_pos && ((parser->sax_state==SAX_STATE_TEXT_CONTENT) || (parser->sax_state==SAX_STATE_COMMENT) ) ) { assert(parser->line_size >= parser->current_pos); parser->line_size -= parser->current_pos; parser->file_pos += parser->current_pos; if (parser->line_size) memmove(parser->buffer, parser->buffer + parser->current_pos, sizeof(char)*parser->line_size); parser->buffer[parser->line_size] = 0; parser->current_pos = 0; }}static void xml_sax_node_end(GF_SAXParser *parser, Bool had_children){ char *name, *sep, c; assert(parser->elt_name_start && parser->elt_name_end && parser->node_depth); c = parser->buffer[parser->elt_name_end - 1]; parser->buffer[parser->elt_name_end - 1] = 0; name = parser->buffer + parser->elt_name_start - 1; if (parser->sax_node_end) { sep = strchr(name, ':'); if (sep) { sep[0] = 0; parser->sax_node_end(parser->sax_cbck, sep+1, name); sep[0] = ':'; } else { parser->sax_node_end(parser->sax_cbck, name, NULL); } } parser->buffer[parser->elt_name_end - 1] = c; parser->node_depth--; if (!parser->init_state && !parser->node_depth) parser->sax_state = SAX_STATE_DONE; xml_sax_swap(parser); parser->text_start = parser->text_end = 0;}static void xml_sax_node_start(GF_SAXParser *parser){ Bool has_entities = 0; u32 i; char *sep, c, *name; assert(parser->elt_name_start && parser->elt_name_end); c = parser->buffer[parser->elt_name_end - 1]; parser->buffer[parser->elt_name_end - 1] = 0; name = parser->buffer + parser->elt_name_start - 1; for (i=0;i<parser->nb_attrs; i++) { parser->attrs[i].name = parser->buffer + parser->sax_attrs[i].name_start - 1; parser->buffer[parser->sax_attrs[i].name_end-1] = 0; parser->attrs[i].value = parser->buffer + parser->sax_attrs[i].val_start - 1; parser->buffer[parser->sax_attrs[i].val_end-1] = 0; if (strchr(parser->attrs[i].value, '&')) { parser->sax_attrs[i].has_entities = 1; has_entities = 1; parser->attrs[i].value = xml_translate_xml_string(parser->attrs[i].value); } /*store first char pos after current attrib for node peeking*/ parser->att_name_start = parser->sax_attrs[i].val_end; } if (parser->sax_node_start) { sep = strchr(name, ':'); if (sep) { sep[0] = 0; parser->sax_node_start(parser->sax_cbck, sep+1, name, parser->attrs, parser->nb_attrs); sep[0] = ':'; } else { parser->sax_node_start(parser->sax_cbck, name, NULL, parser->attrs, parser->nb_attrs); } } parser->att_name_start = 0; parser->buffer[parser->elt_name_end - 1] = c; parser->node_depth++; if (has_entities) { for (i=0;i<parser->nb_attrs; i++) { if (parser->sax_attrs[i].has_entities) { parser->sax_attrs[i].has_entities = 0; free(parser->attrs[i].value); } } } parser->nb_attrs = 0; xml_sax_swap(parser); parser->text_start = parser->text_end = 0;}static Bool xml_sax_parse_attribute(GF_SAXParser *parser){ char *sep; GF_XMLSaxAttribute *att = NULL; /*looking for attribute name*/ if (parser->sax_state==SAX_STATE_ATT_NAME) { /*looking for start*/ if (!parser->att_name_start) { while (parser->current_pos < parser->line_size) { u8 c = parser->buffer[parser->current_pos]; switch (c) { case '\n': parser->line++; case ' ': case '\r': case '\t': parser->current_pos++; continue; /*end of element*/ case '?': if (parser->init_state!=1) break; case '/': /*not enough data*/ if (parser->current_pos+1 == parser->line_size) return 1; if (parser->buffer[parser->current_pos+1]=='>') { parser->current_pos+=2; parser->elt_end_pos = parser->file_pos + parser->current_pos - 1; /*done parsing attr AND elements*/ if (!parser->init_state) { xml_sax_node_start(parser); xml_sax_node_end(parser, 0); } else { parser->nb_attrs = 0; } parser->sax_state = (parser->init_state) ? SAX_STATE_ELEMENT : SAX_STATE_TEXT_CONTENT; parser->text_start = parser->text_end = 0; return 0; } if (!parser->in_quote && (c=='/')) { if (!parser->init_state) { parser->sax_state = SAX_STATE_SYNTAX_ERROR; sprintf(parser->err_msg, "Markup error"); return 1; } } break; case '"': if (parser->sax_state==SAX_STATE_ATT_VALUE) break; if (parser->in_quote && (parser->in_quote!=c) ) { parser->sax_state = SAX_STATE_SYNTAX_ERROR; sprintf(parser->err_msg, "Markup error"); return 1; } if (parser->in_quote) parser->in_quote = 0; else parser->in_quote = c; break; case '>': parser->current_pos+=1; /*end of <!DOCTYPE>*/ if (parser->init_state) { if (parser->init_state==1) { parser->sax_state = SAX_STATE_SYNTAX_ERROR; sprintf(parser->err_msg, "Invalid DOCTYPE"); return 1; } parser->sax_state = SAX_STATE_ELEMENT; return 0; } /*done parsing attr*/ parser->sax_state = SAX_STATE_TEXT_CONTENT; xml_sax_node_start(parser); return 0; case '[': if (parser->init_state) { parser->current_pos+=1; if (parser->init_state==1) { parser->sax_state = SAX_STATE_SYNTAX_ERROR; sprintf(parser->err_msg, "Invalid DOCTYPE"); return 1; } parser->sax_state = SAX_STATE_ELEMENT; return 0; } break; case '<': parser->sax_state = SAX_STATE_SYNTAX_ERROR; sprintf(parser->err_msg, "Invalid character"); return 0; /*first char of attr name*/ default: parser->att_name_start = parser->current_pos + 1; break; } parser->current_pos++; if (parser->att_name_start) break; } if (parser->current_pos == parser->line_size) return 1; } if (parser->init_state==2) { sep = strchr(parser->buffer + parser->att_name_start - 1, parser->in_quote ? parser->in_quote : ' '); /*not enough data*/ if (!sep) return 1; parser->current_pos = sep - parser->buffer; parser->att_name_start = 0; if (parser->in_quote) { parser->current_pos++; parser->in_quote = 0; } return 0; } /*looking for '"'*/ if (parser->att_name_start) { sep = strchr(parser->buffer + parser->att_name_start - 1, '='); /*not enough data*/ if (!sep) return 1; parser->current_pos = sep - parser->buffer; att = xml_get_sax_attribute(parser); att->name_start = parser->att_name_start; att->name_end = parser->current_pos + 1; while (strchr(" \n\t", parser->buffer[att->name_end - 2])) { assert(att->name_end); att->name_end --; } att->has_entities = 0; parser->att_name_start = 0; parser->current_pos++; parser->sax_state = SAX_STATE_ATT_VALUE; } } if (parser->sax_state == SAX_STATE_ATT_VALUE) { att = &parser->sax_attrs[parser->nb_attrs-1]; /*looking for first delimiter*/ if (!parser->att_sep) { while (parser->current_pos < parser->line_size) { u8 c = parser->buffer[parser->current_pos]; switch (c) { case '\n': parser->line++; case ' ': case '\r': case '\t': parser->current_pos++; continue; case '\'': case '"': parser->att_sep = c; att->val_start = parser->current_pos + 2; break; default: break; } parser->current_pos++; if (parser->att_sep) break; } if (parser->current_pos == parser->line_size) return 1; } assert(parser->att_sep); sep = strchr(parser->buffer + parser->current_pos, parser->att_sep); if (!sep) { //fprintf(stdout, "XML: sep not found, waiting for more data\n"); return 1; } parser->current_pos = sep - parser->buffer; att->val_end = parser->current_pos + 1; parser->current_pos++; /*"style" always at the begining of the attributes for ease of parsing*/ if (!strncmp(parser->buffer + att->name_start-1, "style", 5)) { GF_XMLSaxAttribute prev = parser->sax_attrs[0]; parser->sax_attrs[0] = *att; *att = prev; } parser->att_sep = 0; parser->sax_state = SAX_STATE_ATT_NAME; parser->att_name_start = 0; return 0; } return 1;}typedef struct{ char *name; char *value; u8 sep;} XML_Entity;static void xml_sax_flush_text(GF_SAXParser *parser){ u32 offset; char *text, c; if (!parser->text_start || parser->init_state || !parser->sax_text_content) return; offset = 0; while (parser->text_start+offset<parser->text_end) { c = parser->buffer[parser->text_start-1+offset]; if (c=='\r') offset++; else if (c==' ') offset++; else if (c=='\n') { parser->line++; offset++; } else { break; } } parser->text_start+=offset; if (parser->text_start == parser->text_end) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -