⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xml_parser.c

📁 一个用于智能手机的多媒体库适合S60 WinCE的跨平台开发库
💻 C
📖 第 1 页 / 共 3 页
字号:
		parser->text_start = parser->text_end = 0;		return;	}	offset = 0;	while (offset<parser->text_end) {		c = parser->buffer[parser->text_end-2-offset];		if (c=='\r') offset++;		else if (c==' ') offset++;		else if (c=='\n') {			parser->line++;			offset++;		} else {			break;		}	}	parser->text_end-=offset;	assert(parser->text_start < parser->text_end);	c = parser->buffer[parser->text_end-1];	parser->buffer[parser->text_end-1] = 0;	text = parser->buffer + parser->text_start-1;	/*solve XML built-in entities*/	if (strchr(text, '&') && strchr(text, ';')) {		char *xml_text = xml_translate_xml_string(text);		if (xml_text) {			parser->sax_text_content(parser->sax_cbck, xml_text, (parser->sax_state==SAX_STATE_CDATA) ? 1 : 0);			free(xml_text);		}	} else {		parser->sax_text_content(parser->sax_cbck, text, (parser->sax_state==SAX_STATE_CDATA) ? 1 : 0);	}	parser->buffer[parser->text_end-1] = c;	parser->text_start = parser->text_end = 0;}static void xml_sax_store_text(GF_SAXParser *parser, u32 txt_len){	if (!txt_len) return;	if (!parser->text_start) {		parser->text_start = parser->current_pos + 1;		parser->text_end = parser->text_start + txt_len;		parser->current_pos += txt_len;		assert(parser->current_pos <= parser->line_size);		return;	}	/*contiguous text*/	if (parser->text_end && (parser->text_end-1 == parser->current_pos)) {		parser->text_end += txt_len;		parser->current_pos += txt_len;		assert(parser->current_pos <= parser->line_size);		return;	}	/*need to flush*/	xml_sax_flush_text(parser);	parser->text_start = parser->current_pos + 1;	parser->text_end = parser->text_start + txt_len;	parser->current_pos += txt_len;	assert(parser->current_pos <= parser->line_size);}static char *xml_get_current_text(GF_SAXParser *parser){	char *text, c;	if (!parser->text_start) return NULL;	c = parser->buffer[parser->text_end-1];	parser->buffer[parser->text_end-1] = 0;	text = strdup(parser->buffer + parser->text_start-1);	parser->buffer[parser->text_end-1] = c;	parser->text_start = parser->text_end = 0;	return text;}static void xml_sax_skip_doctype(GF_SAXParser *parser){	while (parser->current_pos < parser->line_size) {		if (parser->buffer[parser->current_pos]=='>') {			parser->sax_state = SAX_STATE_ELEMENT;			parser->current_pos++;			xml_sax_swap(parser);			return;		}		parser->current_pos++;	}}static void xml_sax_skip_xml_proc(GF_SAXParser *parser){	while (parser->current_pos + 1 < parser->line_size) {		if ((parser->buffer[parser->current_pos]=='?') && (parser->buffer[parser->current_pos+1]=='>')) {			parser->sax_state = SAX_STATE_ELEMENT;			parser->current_pos++;			xml_sax_swap(parser);			return;		}		parser->current_pos++;	}}static void xml_sax_parse_entity(GF_SAXParser *parser){	char szName[1024];	u32 i = 0;	XML_Entity *ent = (XML_Entity *)gf_list_last(parser->entities);	char *skip_chars = " \t\n\r";	i=0;	if (ent && ent->value) ent = NULL;	if (ent) skip_chars = NULL;	while (parser->current_pos+i < parser->line_size) {		u8 c = parser->buffer[parser->current_pos+i];		if (skip_chars && strchr(skip_chars, c)) {			if (c=='\n') parser->line++;			parser->current_pos++;			continue;		}		if (!ent && (c=='%')) {			parser->current_pos+=i+1;			parser->sax_state = SAX_STATE_SKIP_DOCTYPE;			return;		}		else if (!ent && ((c=='\"') || (c=='\'')) ) {			szName[i] = 0;			GF_SAFEALLOC(ent, XML_Entity);			ent->name = strdup(szName);			ent->sep = c;			parser->current_pos += 1+i;			assert(parser->current_pos < parser->line_size);			xml_sax_swap(parser);			i=0;			gf_list_add(parser->entities, ent);			skip_chars = NULL;		} else if (ent && c==ent->sep) {			xml_sax_store_text(parser, i);			ent->value = xml_get_current_text(parser);			if (!ent->value) ent->value = strdup("");			parser->current_pos += 1;			assert(parser->current_pos < parser->line_size);			xml_sax_swap(parser);			parser->sax_state = SAX_STATE_SKIP_DOCTYPE;			return;		} else if (!ent) {			szName[i] = c;			i++;		} else {			i++;		}	}	xml_sax_store_text(parser, i);}static void xml_sax_cdata(GF_SAXParser *parser){	char *cd_end = strstr(parser->buffer + parser->current_pos, "]]>");	if (!cd_end) {		xml_sax_store_text(parser, parser->line_size - parser->current_pos);	} else {		u32 size = cd_end - (parser->buffer + parser->current_pos);		xml_sax_store_text(parser, size);		xml_sax_flush_text(parser);		parser->current_pos += 3;		assert(parser->current_pos < parser->line_size);		parser->sax_state = SAX_STATE_TEXT_CONTENT;	}}static Bool xml_sax_parse_comments(GF_SAXParser *parser){	char *end = strstr(parser->buffer + parser->current_pos, "-->");	if (!end) {		if (parser->line_size>3)			parser->current_pos = parser->line_size-3;		xml_sax_swap(parser);		return 0;	}	parser->current_pos += 3 + (u32) (end - (parser->buffer + parser->current_pos) );	assert(parser->current_pos <= parser->line_size);	parser->sax_state = SAX_STATE_TEXT_CONTENT;	parser->text_start = parser->text_end = 0;	xml_sax_swap(parser);	return 1;}static GF_Err xml_sax_parse(GF_SAXParser *parser, Bool force_parse){	u32 i = 0;	Bool is_text, is_end;	u8 c;	char *elt, sep;	is_text = 0;	while (parser->current_pos<parser->line_size) {		if (!force_parse && parser->suspended) goto exit;restart:		is_text = 0;		switch (parser->sax_state) {		/*load an XML element*/		case SAX_STATE_TEXT_CONTENT:			is_text = 1;		case SAX_STATE_ELEMENT:			elt = NULL;			i=0;			while ((c = parser->buffer[parser->current_pos+i]) !='<') {				if ((parser->init_state==2) && (c ==']')) {					parser->sax_state = SAX_STATE_ATT_NAME;					parser->current_pos+=i+1;					goto restart;				}				i++;				if (!is_text && (c=='\n')) parser->line++;				if (parser->current_pos+i==parser->line_size) goto exit;			}			if (is_text && i) {				xml_sax_store_text(parser, i);				is_text = 0;				parser->sax_state = SAX_STATE_ELEMENT;			} else if (i) {				parser->current_pos += i;				assert(parser->current_pos < parser->line_size);			}			is_end = 0;			i = 0;			while (1) {				char c = parser->buffer[parser->current_pos+1+i];				if (!c) {					i = 0;					goto exit;				}				if ((c=='\t') || (c=='\r') || (c==' ') ) {					if (i) break;					else parser->current_pos++;				}				else if (c=='\n') {					parser->line++;					if (i) break;					else parser->current_pos++;				}				else if (c=='>') break;				else if (c=='=') break;				else if (c=='/') {					is_end = !i ? 1 : 2;					i++;				} else {					i++;				}//				if ((c=='[') && (parser->buffer[parser->elt_name_start-1 + i-2]=='A') ) break;				if (parser->current_pos+1+i==parser->line_size) {					i=0;					goto exit;				}			}			if (i) {				parser->elt_name_start = parser->current_pos+1 + 1;				if (is_end==1) parser->elt_name_start ++;				if (is_end==2) parser->elt_name_end = parser->current_pos+1+i;				else parser->elt_name_end = parser->current_pos+1+i + 1;			} 			if (is_end) {				xml_sax_flush_text(parser);				parser->elt_end_pos = parser->file_pos + parser->current_pos + i;				if (is_end==2) {					parser->sax_state = SAX_STATE_ELEMENT;					xml_sax_node_start(parser);					xml_sax_node_end(parser, 0);				} else {					parser->elt_end_pos += parser->elt_name_end - parser->elt_name_start;					xml_sax_node_end(parser, 1);				}				if (parser->sax_state == SAX_STATE_SYNTAX_ERROR) break;				parser->current_pos+=2+i;				parser->sax_state = SAX_STATE_TEXT_CONTENT;				break;			}			sep = parser->buffer[parser->elt_name_end-1];			parser->buffer[parser->elt_name_end-1] = 0;			elt = parser->buffer + parser->elt_name_start-1;			parser->sax_state = SAX_STATE_ATT_NAME;			assert(parser->elt_start_pos <= parser->file_pos + parser->current_pos);			parser->elt_start_pos = parser->file_pos + parser->current_pos;			if (!strncmp(elt, "!--", 3)) { 				parser->sax_state = SAX_STATE_COMMENT;				if (i>3) parser->current_pos -= (i-3);			}			else if (!strcmp(elt, "?xml")) parser->init_state = 1;			else if (!strcmp(elt, "!DOCTYPE")) parser->init_state = 2;			else if (!strcmp(elt, "!ENTITY")) parser->sax_state = SAX_STATE_ENTITY;			else if (!strcmp(elt, "!ATTLIST") || !strcmp(elt, "!ELEMENT")) parser->sax_state = SAX_STATE_SKIP_DOCTYPE;			else if (!strcmp(elt, "![CDATA[")) 				parser->sax_state = SAX_STATE_CDATA;			else if (elt[0]=='?') parser->sax_state = SAX_STATE_XML_PROC;			/*node found*/			else {				xml_sax_flush_text(parser);				if (parser->init_state) {					parser->init_state = 0;					/*that's a bit ugly: since we solve entities when appending text, we need to 					reparse the current buffer*/					if (gf_list_count(parser->entities)) {						char *orig_buf;						GF_Err e;						parser->buffer[parser->elt_name_end-1] = sep;						orig_buf = strdup(parser->buffer + parser->current_pos);						parser->current_pos = 0;						parser->line_size = 0;						parser->elt_start_pos = 0;						parser->sax_state = SAX_STATE_TEXT_CONTENT;						e = gf_xml_sax_parse_intern(parser, orig_buf);						free(orig_buf);						return e;					}				}			}			parser->current_pos+=1+i;			parser->buffer[parser->elt_name_end-1] = sep;			break;		case SAX_STATE_COMMENT:			if (!xml_sax_parse_comments(parser)) {				xml_sax_swap(parser);				return GF_OK;			}			break;		case SAX_STATE_ATT_NAME:		case SAX_STATE_ATT_VALUE:			if (xml_sax_parse_attribute(parser)) 				goto exit;			break;		case SAX_STATE_ENTITY:			xml_sax_parse_entity(parser);			break;		case SAX_STATE_SKIP_DOCTYPE:			xml_sax_skip_doctype(parser);			break;		case SAX_STATE_XML_PROC:			xml_sax_skip_xml_proc(parser);			break;		case SAX_STATE_CDATA:			xml_sax_cdata(parser);			break;		case SAX_STATE_SYNTAX_ERROR:			return GF_CORRUPTED_DATA;		case SAX_STATE_DONE:			return GF_EOS;		}	}exit:#if 0	if (is_text) {		if (i) xml_sax_store_text(parser, i);		/*DON'T FLUSH TEXT YET, wait for next '<' to do so otherwise we may corrupt xml base entities (&apos;, ...)*/	}#endif	xml_sax_swap(parser);	return GF_OK;}static GF_Err xml_sax_append_string(GF_SAXParser *parser, char *string){	u32 size = parser->line_size;	u32 nl_size = strlen(string);		if (parser->alloc_size < size+nl_size+1) {		parser->buffer = realloc(parser->buffer, sizeof(char) * (size+nl_size+1) );		if (!parser->buffer ) return GF_OUT_OF_MEM;		parser->alloc_size = size+nl_size+1;	}	memcpy(parser->buffer+size, string, sizeof(char)*nl_size);	parser->buffer[size+nl_size] = 0;	parser->line_size = size+nl_size;	return GF_OK;}static GF_Err gf_xml_sax_parse_intern(GF_SAXParser *parser, char *current){	u32 i, count;	/*solve entities*/	count = gf_list_count(parser->entities);	while (count) {		char *entityEnd, szName[200];		XML_Entity *ent;		char *entityStart = strstr(current, "&");		if (parser->in_entity) {			entityEnd = strstr(current, ";");			if (!entityEnd) return xml_sax_append_string(parser, current);			entityStart = strrchr(parser->buffer, '&');			strcpy(szName, entityStart+1);			entityStart[0] = 0;			entityEnd[0] = 0;			strcat(szName, current);			entityEnd[0] = ';';			parser->in_entity = 0;			current = entityEnd+1;		} else {			if (!entityStart) break;			entityEnd = strstr(entityStart, ";");			entityStart[0] = 0;			xml_sax_append_string(parser, current);			xml_sax_parse(parser, 1);			entityStart[0] = '&';			if (!entityEnd) {				parser->in_entity = 1;				/*store entity start*/				return xml_sax_append_string(parser, entityStart);			}			strncpy(szName, entityStart+1, entityEnd - entityStart - 1);			szName[entityEnd - entityStart - 1] = 0;			current = entityEnd + 1;		}		for (i=0; i<count; i++) {			ent = (XML_Entity *)gf_list_get(parser->entities, i);			if (!strcmp(ent->name, szName)) {				u32 line_num = parser->line;				xml_sax_append_string(parser, ent->value);				xml_sax_parse(parser, 1);				parser->line = line_num;				break;			}		}	}	xml_sax_append_string(parser, current);	return xml_sax_parse(parser, 0);}GF_EXPORTGF_Err gf_xml_sax_parse(GF_SAXParser *parser, void *string){	GF_Err e;	char *current;	char *utf_conv = NULL;		if (parser->unicode_type < 0) return GF_BAD_PARAM;	if (parser->unicode_type>1) {		const u16 *sptr = (const u16 *)string;		u32 len = 2*gf_utf8_wcslen(sptr);		utf_conv = (char *)malloc(sizeof(char)*(len+1));		len = gf_utf8_wcstombs(utf_conv, len, &sptr);		if (len==(u32) -1) {			parser->sax_state = SAX_STATE_SYNTAX_ERROR;			free(utf_conv);			return GF_CORRUPTED_DATA;		}		utf_conv[len] = 0;		current = utf_conv;	} else {		current = (char *)string;	}	e = gf_xml_sax_parse_intern(parser, current);	if (utf_conv) free(utf_conv);	return e;}GF_EXPORTGF_Err gf_xml_sax_init(GF_SAXParser *parser, unsigned char *BOM){	u32 offset;	if (!BOM) parser->unicode_type = 0;	if (parser->unicode_type >= 0) return gf_xml_sax_parse(parser, BOM);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -