📄 xml_parser.c
字号:
offset = 0; if ((BOM[0]==0xFF) && (BOM[1]==0xFE)) { if (!BOM[2] && !BOM[3]) return GF_NOT_SUPPORTED; parser->unicode_type = 2; offset = 2; } else if ((BOM[0]==0xFE) && (BOM[1]==0xFF)) { if (!BOM[2] && !BOM[3]) return GF_NOT_SUPPORTED; parser->unicode_type = 1; offset = 2; } else if ((BOM[0]==0xEF) && (BOM[1]==0xBB) && (BOM[2]==0xBF)) { /*we handle UTF8 as asci*/ parser->unicode_type = 0; offset = 3; } else { parser->unicode_type = 0; offset = 0; } parser->sax_state = SAX_STATE_ELEMENT; return gf_xml_sax_parse(parser, BOM + offset);}static void xml_sax_reset(GF_SAXParser *parser){ while (1) { XML_Entity *ent = (XML_Entity *)gf_list_last(parser->entities); if (!ent) break; gf_list_rem_last(parser->entities); if (ent->name) free(ent->name); if (ent->value) free(ent->value); free(ent); } if (parser->buffer) free(parser->buffer); parser->buffer = NULL; parser->current_pos = 0; free(parser->attrs); parser->attrs = NULL; free(parser->sax_attrs); parser->sax_attrs = NULL; parser->nb_alloc_attrs = parser->nb_attrs = 0;}#define XML_INPUT_SIZE 4096static GF_Err xml_sax_read_file(GF_SAXParser *parser){ GF_Err e = GF_EOS; unsigned char szLine[XML_INPUT_SIZE+2]; if (!parser->gz_in) return GF_BAD_PARAM; parser->file_pos = 0; while (!gzeof(parser->gz_in) && !parser->suspended) { u32 read = gzread(parser->gz_in, szLine, XML_INPUT_SIZE); if (!read) break; szLine[read] = 0; szLine[read+1] = 0; e = gf_xml_sax_parse(parser, szLine); if (e) break; if (parser->file_pos > parser->file_size) parser->file_size = parser->file_pos + 1; if (parser->on_progress) parser->on_progress(parser->sax_cbck, parser->file_pos, parser->file_size); } if (gzeof(parser->gz_in)) { if (!e) e = GF_EOS; if (parser->on_progress) parser->on_progress(parser->sax_cbck, parser->file_size, parser->file_size); gzclose(parser->gz_in); parser->gz_in = 0; } return e;}GF_EXPORTGF_Err gf_xml_sax_parse_file(GF_SAXParser *parser, const char *fileName, gf_xml_sax_progress OnProgress){ FILE *test; GF_Err e; gzFile gzInput; unsigned char szLine[6]; /*check file exists and gets its size (zlib doesn't support SEEK_END)*/ test = fopen(fileName, "rb"); if (!test) return GF_URL_ERROR; fseek(test, 0, SEEK_END); parser->file_size = ftell(test); fclose(test); parser->on_progress = OnProgress; gzInput = gzopen(fileName, "rb"); if (!gzInput) return GF_IO_ERR; parser->gz_in = gzInput; /*init SAX parser (unicode setup)*/ gzread(gzInput, szLine, 4); szLine[4] = szLine[5] = 0; e = gf_xml_sax_init(parser, szLine); if (e) return e; return xml_sax_read_file(parser);}GF_EXPORTBool gf_xml_sax_binary_file(GF_SAXParser *parser){ if (!parser || !parser->gz_in) return 0; return (((z_stream*)parser->gz_in)->data_type==Z_BINARY) ? 1 : 0;}GF_EXPORTGF_SAXParser *gf_xml_sax_new(gf_xml_sax_node_start on_node_start, gf_xml_sax_node_end on_node_end, gf_xml_sax_text_content on_text_content, void *cbck){ GF_SAXParser *parser; GF_SAFEALLOC(parser, GF_SAXParser); parser->entities = gf_list_new(); parser->unicode_type = -1; parser->sax_node_start = on_node_start; parser->sax_node_end = on_node_end; parser->sax_text_content = on_text_content; parser->sax_cbck = cbck; return parser;}GF_EXPORTvoid gf_xml_sax_del(GF_SAXParser *parser){ xml_sax_reset(parser); gf_list_del(parser->entities); if (parser->gz_in) gzclose(parser->gz_in); free(parser);}GF_EXPORTGF_Err gf_xml_sax_suspend(GF_SAXParser *parser, Bool do_suspend){ parser->suspended = do_suspend; if (!do_suspend) { if (parser->gz_in) return xml_sax_read_file(parser); return xml_sax_parse(parser, 0); } return GF_OK;}GF_EXPORTu32 gf_xml_sax_get_line(GF_SAXParser *parser) { return parser->line + 1 ; }GF_EXPORTu32 gf_xml_sax_get_file_size(GF_SAXParser *parser) { return parser->gz_in ? parser->file_size : 0; }GF_EXPORTu32 gf_xml_sax_get_file_pos(GF_SAXParser *parser) { return parser->gz_in ? parser->file_pos : 0; }GF_EXPORTchar *gf_xml_sax_peek_node(GF_SAXParser *parser, char *att_name, char *att_value, char *substitute, char *get_attr, char *end_pattern, Bool *is_substitute){ u32 state, att_len; z_off_t pos; char szLine1[XML_INPUT_SIZE+2], szLine2[XML_INPUT_SIZE+2], *szLine, *cur_line, *sep, *start, first_c, *result; if (!parser->gz_in) return NULL; result = NULL; szLine1[0] = szLine2[0] = 0; pos = gztell(parser->gz_in); att_len = strlen(parser->buffer + parser->att_name_start); if (att_len<2*XML_INPUT_SIZE) att_len = 2*XML_INPUT_SIZE; szLine = (char *) malloc(sizeof(char)*att_len); strcpy(szLine, parser->buffer + parser->att_name_start); cur_line = szLine; att_len = strlen(att_value); state = 0; goto retry; while (!gzeof(parser->gz_in)) { u32 read; if (cur_line == szLine2) { cur_line = szLine1; } else { cur_line = szLine2; } read = gzread(parser->gz_in, cur_line, XML_INPUT_SIZE); cur_line[read] = cur_line[read+1] = 0; strcat(szLine, cur_line);retry: if (state == 2) goto fetch_attr; sep = strstr(szLine, att_name); if (!sep && !state) { state = 0; strcpy(szLine, cur_line); if (end_pattern && strstr(szLine, end_pattern)) goto exit; continue; } if (!state) { state = 1; /*load next line*/ first_c = sep[0]; sep[0] = 0; start = strrchr(szLine, '<'); if (!start) goto exit; sep[0] = first_c; strcpy(szLine, start); sep = strstr(szLine, att_name); } sep = strchr(sep, '='); if (!sep) { state = 0; strcpy(szLine, cur_line); continue; } while ( (sep[0] != '\"') && (sep[0] != '\"') ) sep++; sep++; /*found*/ if (!strncmp(sep, att_value, att_len)) { u32 pos; sep = szLine + 1; while (strchr(" \t\r\n", sep[0])) sep++; pos = 0; while (!strchr(" \t\r\n", sep[pos])) pos++; first_c = sep[pos]; sep[pos] = 0; state = 2; if (!substitute || !get_attr || strcmp(sep, substitute) ) { if (is_substitute) *is_substitute = 0; result = strdup(sep); goto exit; } sep[pos] = first_c;fetch_attr: sep = strstr(szLine + 1, get_attr); if (!sep) { strcpy(szLine, cur_line); continue; } sep += strlen(get_attr); while (strchr("= \t\r\n", sep[0])) sep++; sep++; pos = 0; while (!strchr(" \t\r\n/>", sep[pos])) pos++; sep[pos-1] = 0; result = strdup(sep); if (is_substitute) *is_substitute = 1; goto exit; } state = 0; strcpy(szLine, sep); goto retry; }exit: free(szLine); gzrewind(parser->gz_in); gzseek(parser->gz_in, pos, SEEK_SET); return result;}GF_EXPORTconst char *gf_xml_sax_get_error(GF_SAXParser *parser){ return parser->err_msg;}struct _peek_type{ GF_SAXParser *parser; char *res;};static void on_peek_node_start(void *cbk, const char *name, const char *ns, const GF_XMLAttribute *attributes, u32 nb_attributes){ struct _peek_type *pt = (struct _peek_type*)cbk; pt->res = strdup(name); pt->parser->suspended = 1;}GF_EXPORTchar *gf_xml_get_root_type(const char *file, GF_Err *ret){ GF_Err e; struct _peek_type pt; pt.res = NULL; pt.parser = gf_xml_sax_new(on_peek_node_start, NULL, NULL, &pt); e = gf_xml_sax_parse_file(pt.parser, file, NULL); if (ret) *ret = e; gf_xml_sax_del(pt.parser); return pt.res;}GF_EXPORTu32 gf_xml_sax_get_node_start_pos(GF_SAXParser *parser){ return parser->elt_start_pos;}GF_EXPORTu32 gf_xml_sax_get_node_end_pos(GF_SAXParser *parser){ return parser->elt_end_pos;}struct _tag_dom_parser{ GF_SAXParser *parser; GF_List *stack; GF_XMLNode *root; u32 depth; void (*OnProgress)(void *cbck, u32 done, u32 tot); void *cbk;};static void gf_xml_dom_node_del(GF_XMLNode *node){ if (node->attributes) { while (gf_list_count(node->attributes)) { GF_XMLAttribute *att = (GF_XMLAttribute *)gf_list_last(node->attributes); gf_list_rem_last(node->attributes); if (att->name) free(att->name); if (att->value) free(att->value); free(att); } gf_list_del(node->attributes); } if (node->content) { while (gf_list_count(node->content)) { GF_XMLNode *child = (GF_XMLNode *)gf_list_last(node->content); gf_list_rem_last(node->content); gf_xml_dom_node_del(child); } gf_list_del(node->content); } if (node->ns) free(node->ns); if (node->name) free(node->name); free(node);}static void on_dom_node_start(void *cbk, const char *name, const char *ns, const GF_XMLAttribute *attributes, u32 nb_attributes){ u32 i; GF_DOMParser *par = (GF_DOMParser *) cbk; GF_XMLNode *node; if (par->root && !gf_list_count(par->stack)) { par->parser->suspended = 1; return; } GF_SAFEALLOC(node, GF_XMLNode); node->attributes = gf_list_new(); for (i=0; i<nb_attributes; i++) { GF_XMLAttribute *att; GF_SAFEALLOC(att, GF_XMLAttribute); att->name = strdup(attributes[i].name); att->value = strdup(attributes[i].value); gf_list_add(node->attributes, att); } node->content = gf_list_new(); node->name = strdup(name); if (ns) node->ns = strdup(ns); gf_list_add(par->stack, node); if (!par->root) par->root = node;}static void on_dom_node_end(void *cbk, const char *name, const char *ns){ GF_DOMParser *par = (GF_DOMParser *)cbk; GF_XMLNode *last = (GF_XMLNode *)gf_list_last(par->stack); gf_list_rem_last(par->stack); if (!last || strcmp(last->name, name) || (!ns && last->ns) || (ns && !last->ns) || (ns && strcmp(last->ns, ns) ) ) { par->parser->suspended = 1; gf_xml_dom_node_del(last); return; } if (last != par->root) { GF_XMLNode *node = (GF_XMLNode *)gf_list_last(par->stack); assert(node->content); assert(gf_list_find(node->content, last) == -1); gf_list_add(node->content, last); }}static void on_dom_text_content(void *cbk, const char *content, Bool is_cdata){ GF_DOMParser *par = (GF_DOMParser *)cbk; GF_XMLNode *node; GF_XMLNode *last = (GF_XMLNode *)gf_list_last(par->stack); if (!last) return; assert(last->content); GF_SAFEALLOC(node, GF_XMLNode); node->type = is_cdata ? GF_XML_CDATA_TYPE : GF_XML_TEXT_TYPE; node->name = strdup(content); gf_list_add(last->content, node);}GF_EXPORTGF_DOMParser *gf_xml_dom_new(){ GF_DOMParser *dom; GF_SAFEALLOC(dom, GF_DOMParser); return dom;}static void gf_xml_dom_reset(GF_DOMParser *dom, Bool full_reset){ if (full_reset && dom->parser) { gf_xml_sax_del(dom->parser); dom->parser = NULL; } if (dom->stack) { while (gf_list_count(dom->stack)) { GF_XMLNode *n = (GF_XMLNode *)gf_list_last(dom->stack); gf_list_rem_last(dom->stack); if (dom->root==n) dom->root = NULL; gf_xml_dom_node_del(n); } gf_list_del(dom->stack); dom->stack = NULL; } if (full_reset && dom->root) { gf_xml_dom_node_del(dom->root); dom->root = NULL; }}GF_EXPORTvoid gf_xml_dom_del(GF_DOMParser *parser){ gf_xml_dom_reset(parser, 1); free(parser);}static void dom_on_progress(void *cbck, u32 done, u32 tot){ GF_DOMParser *dom = (GF_DOMParser *)cbck; dom->OnProgress(dom->cbk, done, tot);}GF_EXPORTGF_Err gf_xml_dom_parse(GF_DOMParser *dom, const char *file, gf_xml_sax_progress OnProgress, void *cbk){ GF_Err e; gf_xml_dom_reset(dom, 1); dom->stack = gf_list_new(); dom->parser = gf_xml_sax_new(on_dom_node_start, on_dom_node_end, on_dom_text_content, dom); dom->OnProgress = OnProgress; dom->cbk = cbk; e = gf_xml_sax_parse_file(dom->parser, file, OnProgress ? dom_on_progress : NULL); gf_xml_dom_reset(dom, 0); return e<0 ? e : GF_OK;}GF_EXPORTGF_XMLNode *gf_xml_dom_get_root(GF_DOMParser *parser){ return parser->root;}GF_EXPORTconst char *gf_xml_dom_get_error(GF_DOMParser *parser){ return gf_xml_sax_get_error(parser->parser);}GF_EXPORTu32 gf_xml_dom_get_line(GF_DOMParser *parser){ return gf_xml_sax_get_line(parser->parser);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -