📄 xml_parser.c
字号:
parser->text_start = parser->text_end = 0; return; } offset = 0; while (offset<parser->text_end) { c = parser->buffer[parser->text_end-2-offset]; if (c=='\r') offset++; else if (c==' ') offset++; else if (c=='\n') { parser->line++; offset++; } else { break; } } parser->text_end-=offset; assert(parser->text_start < parser->text_end); c = parser->buffer[parser->text_end-1]; parser->buffer[parser->text_end-1] = 0; text = parser->buffer + parser->text_start-1; /*solve XML built-in entities*/ if (strchr(text, '&') && strchr(text, ';')) { char *xml_text = xml_translate_xml_string(text); if (xml_text) { parser->sax_text_content(parser->sax_cbck, xml_text, (parser->sax_state==SAX_STATE_CDATA) ? 1 : 0); free(xml_text); } } else { parser->sax_text_content(parser->sax_cbck, text, (parser->sax_state==SAX_STATE_CDATA) ? 1 : 0); } parser->buffer[parser->text_end-1] = c; parser->text_start = parser->text_end = 0;}static void xml_sax_store_text(GF_SAXParser *parser, u32 txt_len){ if (!txt_len) return; if (!parser->text_start) { parser->text_start = parser->current_pos + 1; parser->text_end = parser->text_start + txt_len; parser->current_pos += txt_len; assert(parser->current_pos <= parser->line_size); return; } /*contiguous text*/ if (parser->text_end && (parser->text_end-1 == parser->current_pos)) { parser->text_end += txt_len; parser->current_pos += txt_len; assert(parser->current_pos <= parser->line_size); return; } /*need to flush*/ xml_sax_flush_text(parser); parser->text_start = parser->current_pos + 1; parser->text_end = parser->text_start + txt_len; parser->current_pos += txt_len; assert(parser->current_pos <= parser->line_size);}static char *xml_get_current_text(GF_SAXParser *parser){ char *text, c; if (!parser->text_start) return NULL; c = parser->buffer[parser->text_end-1]; parser->buffer[parser->text_end-1] = 0; text = strdup(parser->buffer + parser->text_start-1); parser->buffer[parser->text_end-1] = c; parser->text_start = parser->text_end = 0; return text;}static void xml_sax_skip_doctype(GF_SAXParser *parser){ while (parser->current_pos < parser->line_size) { if (parser->buffer[parser->current_pos]=='>') { parser->sax_state = SAX_STATE_ELEMENT; parser->current_pos++; xml_sax_swap(parser); return; } parser->current_pos++; }}static void xml_sax_skip_xml_proc(GF_SAXParser *parser){ while (parser->current_pos + 1 < parser->line_size) { if ((parser->buffer[parser->current_pos]=='?') && (parser->buffer[parser->current_pos+1]=='>')) { parser->sax_state = SAX_STATE_ELEMENT; parser->current_pos++; xml_sax_swap(parser); return; } parser->current_pos++; }}static void xml_sax_parse_entity(GF_SAXParser *parser){ char szName[1024]; u32 i = 0; XML_Entity *ent = (XML_Entity *)gf_list_last(parser->entities); char *skip_chars = " \t\n\r"; i=0; if (ent && ent->value) ent = NULL; if (ent) skip_chars = NULL; while (parser->current_pos+i < parser->line_size) { u8 c = parser->buffer[parser->current_pos+i]; if (skip_chars && strchr(skip_chars, c)) { if (c=='\n') parser->line++; parser->current_pos++; continue; } if (!ent && (c=='%')) { parser->current_pos+=i+1; parser->sax_state = SAX_STATE_SKIP_DOCTYPE; return; } else if (!ent && ((c=='\"') || (c=='\'')) ) { szName[i] = 0; GF_SAFEALLOC(ent, XML_Entity); ent->name = strdup(szName); ent->sep = c; parser->current_pos += 1+i; assert(parser->current_pos < parser->line_size); xml_sax_swap(parser); i=0; gf_list_add(parser->entities, ent); skip_chars = NULL; } else if (ent && c==ent->sep) { xml_sax_store_text(parser, i); ent->value = xml_get_current_text(parser); if (!ent->value) ent->value = strdup(""); parser->current_pos += 1; assert(parser->current_pos < parser->line_size); xml_sax_swap(parser); parser->sax_state = SAX_STATE_SKIP_DOCTYPE; return; } else if (!ent) { szName[i] = c; i++; } else { i++; } } xml_sax_store_text(parser, i);}static void xml_sax_cdata(GF_SAXParser *parser){ char *cd_end = strstr(parser->buffer + parser->current_pos, "]]>"); if (!cd_end) { xml_sax_store_text(parser, parser->line_size - parser->current_pos); } else { u32 size = cd_end - (parser->buffer + parser->current_pos); xml_sax_store_text(parser, size); xml_sax_flush_text(parser); parser->current_pos += 3; assert(parser->current_pos < parser->line_size); parser->sax_state = SAX_STATE_TEXT_CONTENT; }}static Bool xml_sax_parse_comments(GF_SAXParser *parser){ char *end = strstr(parser->buffer + parser->current_pos, "-->"); if (!end) { if (parser->line_size>3) parser->current_pos = parser->line_size-3; xml_sax_swap(parser); return 0; } parser->current_pos += 3 + (u32) (end - (parser->buffer + parser->current_pos) ); assert(parser->current_pos <= parser->line_size); parser->sax_state = SAX_STATE_TEXT_CONTENT; parser->text_start = parser->text_end = 0; xml_sax_swap(parser); return 1;}static GF_Err xml_sax_parse(GF_SAXParser *parser, Bool force_parse){ u32 i = 0; Bool is_text, is_end; u8 c; char *elt, sep; is_text = 0; while (parser->current_pos<parser->line_size) { if (!force_parse && parser->suspended) goto exit;restart: is_text = 0; switch (parser->sax_state) { /*load an XML element*/ case SAX_STATE_TEXT_CONTENT: is_text = 1; case SAX_STATE_ELEMENT: elt = NULL; i=0; while ((c = parser->buffer[parser->current_pos+i]) !='<') { if ((parser->init_state==2) && (c ==']')) { parser->sax_state = SAX_STATE_ATT_NAME; parser->current_pos+=i+1; goto restart; } i++; if (!is_text && (c=='\n')) parser->line++; if (parser->current_pos+i==parser->line_size) goto exit; } if (is_text && i) { xml_sax_store_text(parser, i); is_text = 0; parser->sax_state = SAX_STATE_ELEMENT; } else if (i) { parser->current_pos += i; assert(parser->current_pos < parser->line_size); } is_end = 0; i = 0; while (1) { char c = parser->buffer[parser->current_pos+1+i]; if (!c) { i = 0; goto exit; } if ((c=='\t') || (c=='\r') || (c==' ') ) { if (i) break; else parser->current_pos++; } else if (c=='\n') { parser->line++; if (i) break; else parser->current_pos++; } else if (c=='>') break; else if (c=='=') break; else if (c=='/') { is_end = !i ? 1 : 2; i++; } else { i++; }// if ((c=='[') && (parser->buffer[parser->elt_name_start-1 + i-2]=='A') ) break; if (parser->current_pos+1+i==parser->line_size) { i=0; goto exit; } } if (i) { parser->elt_name_start = parser->current_pos+1 + 1; if (is_end==1) parser->elt_name_start ++; if (is_end==2) parser->elt_name_end = parser->current_pos+1+i; else parser->elt_name_end = parser->current_pos+1+i + 1; } if (is_end) { xml_sax_flush_text(parser); parser->elt_end_pos = parser->file_pos + parser->current_pos + i; if (is_end==2) { parser->sax_state = SAX_STATE_ELEMENT; xml_sax_node_start(parser); xml_sax_node_end(parser, 0); } else { parser->elt_end_pos += parser->elt_name_end - parser->elt_name_start; xml_sax_node_end(parser, 1); } if (parser->sax_state == SAX_STATE_SYNTAX_ERROR) break; parser->current_pos+=2+i; parser->sax_state = SAX_STATE_TEXT_CONTENT; break; } sep = parser->buffer[parser->elt_name_end-1]; parser->buffer[parser->elt_name_end-1] = 0; elt = parser->buffer + parser->elt_name_start-1; parser->sax_state = SAX_STATE_ATT_NAME; assert(parser->elt_start_pos <= parser->file_pos + parser->current_pos); parser->elt_start_pos = parser->file_pos + parser->current_pos; if (!strncmp(elt, "!--", 3)) { parser->sax_state = SAX_STATE_COMMENT; if (i>3) parser->current_pos -= (i-3); } else if (!strcmp(elt, "?xml")) parser->init_state = 1; else if (!strcmp(elt, "!DOCTYPE")) parser->init_state = 2; else if (!strcmp(elt, "!ENTITY")) parser->sax_state = SAX_STATE_ENTITY; else if (!strcmp(elt, "!ATTLIST") || !strcmp(elt, "!ELEMENT")) parser->sax_state = SAX_STATE_SKIP_DOCTYPE; else if (!strcmp(elt, "![CDATA[")) parser->sax_state = SAX_STATE_CDATA; else if (elt[0]=='?') parser->sax_state = SAX_STATE_XML_PROC; /*node found*/ else { xml_sax_flush_text(parser); if (parser->init_state) { parser->init_state = 0; /*that's a bit ugly: since we solve entities when appending text, we need to reparse the current buffer*/ if (gf_list_count(parser->entities)) { char *orig_buf; GF_Err e; parser->buffer[parser->elt_name_end-1] = sep; orig_buf = strdup(parser->buffer + parser->current_pos); parser->current_pos = 0; parser->line_size = 0; parser->elt_start_pos = 0; parser->sax_state = SAX_STATE_TEXT_CONTENT; e = gf_xml_sax_parse_intern(parser, orig_buf); free(orig_buf); return e; } } } parser->current_pos+=1+i; parser->buffer[parser->elt_name_end-1] = sep; break; case SAX_STATE_COMMENT: if (!xml_sax_parse_comments(parser)) { xml_sax_swap(parser); return GF_OK; } break; case SAX_STATE_ATT_NAME: case SAX_STATE_ATT_VALUE: if (xml_sax_parse_attribute(parser)) goto exit; break; case SAX_STATE_ENTITY: xml_sax_parse_entity(parser); break; case SAX_STATE_SKIP_DOCTYPE: xml_sax_skip_doctype(parser); break; case SAX_STATE_XML_PROC: xml_sax_skip_xml_proc(parser); break; case SAX_STATE_CDATA: xml_sax_cdata(parser); break; case SAX_STATE_SYNTAX_ERROR: return GF_CORRUPTED_DATA; case SAX_STATE_DONE: return GF_EOS; } }exit:#if 0 if (is_text) { if (i) xml_sax_store_text(parser, i); /*DON'T FLUSH TEXT YET, wait for next '<' to do so otherwise we may corrupt xml base entities (', ...)*/ }#endif xml_sax_swap(parser); return GF_OK;}static GF_Err xml_sax_append_string(GF_SAXParser *parser, char *string){ u32 size = parser->line_size; u32 nl_size = strlen(string); if (parser->alloc_size < size+nl_size+1) { parser->buffer = realloc(parser->buffer, sizeof(char) * (size+nl_size+1) ); if (!parser->buffer ) return GF_OUT_OF_MEM; parser->alloc_size = size+nl_size+1; } memcpy(parser->buffer+size, string, sizeof(char)*nl_size); parser->buffer[size+nl_size] = 0; parser->line_size = size+nl_size; return GF_OK;}static GF_Err gf_xml_sax_parse_intern(GF_SAXParser *parser, char *current){ u32 i, count; /*solve entities*/ count = gf_list_count(parser->entities); while (count) { char *entityEnd, szName[200]; XML_Entity *ent; char *entityStart = strstr(current, "&"); if (parser->in_entity) { entityEnd = strstr(current, ";"); if (!entityEnd) return xml_sax_append_string(parser, current); entityStart = strrchr(parser->buffer, '&'); strcpy(szName, entityStart+1); entityStart[0] = 0; entityEnd[0] = 0; strcat(szName, current); entityEnd[0] = ';'; parser->in_entity = 0; current = entityEnd+1; } else { if (!entityStart) break; entityEnd = strstr(entityStart, ";"); entityStart[0] = 0; xml_sax_append_string(parser, current); xml_sax_parse(parser, 1); entityStart[0] = '&'; if (!entityEnd) { parser->in_entity = 1; /*store entity start*/ return xml_sax_append_string(parser, entityStart); } strncpy(szName, entityStart+1, entityEnd - entityStart - 1); szName[entityEnd - entityStart - 1] = 0; current = entityEnd + 1; } for (i=0; i<count; i++) { ent = (XML_Entity *)gf_list_get(parser->entities, i); if (!strcmp(ent->name, szName)) { u32 line_num = parser->line; xml_sax_append_string(parser, ent->value); xml_sax_parse(parser, 1); parser->line = line_num; break; } } } xml_sax_append_string(parser, current); return xml_sax_parse(parser, 0);}GF_EXPORTGF_Err gf_xml_sax_parse(GF_SAXParser *parser, void *string){ GF_Err e; char *current; char *utf_conv = NULL; if (parser->unicode_type < 0) return GF_BAD_PARAM; if (parser->unicode_type>1) { const u16 *sptr = (const u16 *)string; u32 len = 2*gf_utf8_wcslen(sptr); utf_conv = (char *)malloc(sizeof(char)*(len+1)); len = gf_utf8_wcstombs(utf_conv, len, &sptr); if (len==(u32) -1) { parser->sax_state = SAX_STATE_SYNTAX_ERROR; free(utf_conv); return GF_CORRUPTED_DATA; } utf_conv[len] = 0; current = utf_conv; } else { current = (char *)string; } e = gf_xml_sax_parse_intern(parser, current); if (utf_conv) free(utf_conv); return e;}GF_EXPORTGF_Err gf_xml_sax_init(GF_SAXParser *parser, unsigned char *BOM){ u32 offset; if (!BOM) parser->unicode_type = 0; if (parser->unicode_type >= 0) return gf_xml_sax_parse(parser, BOM);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -