⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sax.c

📁 linux平台或者windwos平台通用xml 解析器
💻 C
字号:
/* iksemel (XML parser for Jabber)** Copyright (C) 2000-2004 Gurer Ozen <madcat@e-kolay.net>** This code is free software; you can redistribute it and/or** modify it under the terms of GNU Lesser General Public License.*/#include "common.h"#include "iksemel.h"enum cons_e {	C_CDATA = 0,	C_TAG_START,	C_TAG,	C_TAG_END,	C_ATTRIBUTE,	C_ATTRIBUTE_1,	C_ATTRIBUTE_2,	C_VALUE,	C_VALUE_APOS,	C_VALUE_QUOT,	C_WHITESPACE,	C_ENTITY,	C_COMMENT,	C_COMMENT_1,	C_COMMENT_2,	C_COMMENT_3,	C_MARKUP,	C_MARKUP_1,	C_SECT,	C_SECT_CDATA,	C_SECT_CDATA_1,	C_SECT_CDATA_2,	C_SECT_CDATA_3,	C_SECT_CDATA_4,	C_SECT_CDATA_C,	C_SECT_CDATA_E,	C_SECT_CDATA_E2,	C_PI};/* if you add a variable here, dont forget changing iks_parser_reset */struct iksparser_struct {	ikstack *s;	void *user_data;	iksTagHook *tagHook;	iksCDataHook *cdataHook;	iksDeleteHook *deleteHook;	/* parser context */	char *stack;	size_t stack_pos;	size_t stack_max;	enum cons_e context;	enum cons_e oldcontext;	char *tag_name;	enum ikstagtype tagtype;	unsigned int attmax;	unsigned int attcur;	int attflag;	char **atts;	int valflag;	unsigned int entpos;	char entity[8];	unsigned long nr_bytes;	unsigned long nr_lines;	int uni_max;	int uni_len;};iksparser *iks_sax_new (void *user_data, iksTagHook *tagHook, iksCDataHook *cdataHook){	iksparser *prs;	prs = iks_malloc (sizeof (iksparser));	if (NULL == prs) return NULL;	memset (prs, 0, sizeof (iksparser));	prs->user_data = user_data;	prs->tagHook = tagHook;	prs->cdataHook = cdataHook;	return prs;}iksparser *iks_sax_extend (ikstack *s, void *user_data, iksTagHook *tagHook, iksCDataHook *cdataHook, iksDeleteHook *deleteHook){	iksparser *prs;	prs = iks_stack_alloc (s, sizeof (iksparser));	if (NULL == prs) return NULL;	memset (prs, 0, sizeof (iksparser));	prs->s = s;	prs->user_data = user_data;	prs->tagHook = tagHook;	prs->cdataHook = cdataHook;	prs->deleteHook = deleteHook;	return prs;}ikstack *iks_parser_stack (iksparser *prs){	return prs->s;}void *iks_user_data (iksparser *prs){	return prs->user_data;}unsigned longiks_nr_bytes (iksparser *prs){	return prs->nr_bytes;}unsigned longiks_nr_lines (iksparser *prs){	return prs->nr_lines;}#define IS_WHITESPACE(x) ' ' == (x) || '\t' == (x) || '\r' == (x) || '\n' == (x)#define NOT_WHITESPACE(x) ' ' != (x) && '\t' != (x) && '\r' != (x) && '\n' != (x)static intstack_init (iksparser *prs){	prs->stack = iks_malloc (128);	if (!prs->stack) return 0;	prs->stack_max = 128;	prs->stack_pos = 0;	return 1;}static intstack_expand (iksparser *prs, int len){	size_t need;	off_t diff;	char *tmp;	need = len - (prs->stack_max - prs->stack_pos);	if (need < prs->stack_max) {		need = prs->stack_max * 2;	} else {		/* need x 1.2 for integer only archs like ARM */		need = prs->stack_max + ( (need * 6) / 5);	}	tmp = iks_malloc (need);	if (!tmp) return 0;	diff = tmp - prs->stack;	memcpy (tmp, prs->stack, prs->stack_max);	iks_free (prs->stack);	prs->stack = tmp;	prs->stack_max = need;	prs->tag_name += diff;	if (prs->attflag != 0) {		int i = 0;		while (i < (prs->attmax * 2)) {			if (prs->atts[i]) prs->atts[i] += diff;			i++;		}	}	return 1;}#define STACK_INIT \	if (NULL == prs->stack && 0 == stack_init (prs)) return IKS_NOMEM#define STACK_PUSH_START (prs->stack + prs->stack_pos)#define STACK_PUSH(buf,len) \{ \	char *sbuf = (buf); \	size_t slen = (len); \	if (prs->stack_max - prs->stack_pos <= slen) { \		if (0 == stack_expand (prs, slen)) return IKS_NOMEM; \	} \	memcpy (prs->stack + prs->stack_pos, sbuf, slen); \	prs->stack_pos += slen; \}#define STACK_PUSH_END \{ \	if (prs->stack_pos >= prs->stack_max) { \		if (0 == stack_expand (prs, 1)) return IKS_NOMEM; \	} \	prs->stack[prs->stack_pos] = '\0'; \	prs->stack_pos++; \}static enum ikserrorsax_core (iksparser *prs, char *buf, int len){	enum ikserror err;	int pos = 0, old = 0, re, stack_old = -1;	unsigned char c;	while (pos < len) {		re = 0;		c = buf[pos];		if (0 == c || 0xFE == c || 0xFF == c) return IKS_BADXML;		if (prs->uni_max) {			if ((c & 0xC0) != 0x80) return IKS_BADXML;			prs->uni_len++;			if (prs->uni_len == prs->uni_max) prs->uni_max = 0;			goto cont;		} else {			if (c & 0x80) {				unsigned char mask;				if ((c & 0x60) == 0x40) {					prs->uni_max = 2;					mask = 0x1F;				} else if ((c & 0x70) == 0x60) {					prs->uni_max = 3;					mask = 0x0F;				} else if ((c & 0x78) == 0x70) {					prs->uni_max = 4;					mask = 0x07;				} else if ((c & 0x7C) == 0x78) {					prs->uni_max = 5;					mask = 0x03;				} else if ((c & 0x7E) == 0x7C) {					prs->uni_max = 6;					mask = 0x01;				} else {					return IKS_BADXML;				}				if ((c & mask) == 0) return IKS_BADXML;				prs->uni_len = 1;				if (stack_old == -1					&& (prs->context == C_TAG						|| prs->context == C_ATTRIBUTE_1						|| prs->context == C_VALUE_APOS						|| prs->context == C_VALUE_QUOT)) stack_old = pos;				goto cont;			}		}		switch (prs->context) {			case C_CDATA:				if ('&' == c) {					if (old < pos && prs->cdataHook) {						err = prs->cdataHook (prs->user_data, &buf[old], pos - old);						if (IKS_OK != err) return err;					}					prs->context = C_ENTITY;					prs->entpos = 0;					break;				}				if ('<' == c) {					if (old < pos && prs->cdataHook) {						err = prs->cdataHook (prs->user_data, &buf[old], pos - old);						if (IKS_OK != err) return err;					}					STACK_INIT;					prs->tag_name = STACK_PUSH_START;					if (!prs->tag_name) return IKS_NOMEM;					prs->context = C_TAG_START;				}				break;			case C_TAG_START:				prs->context = C_TAG;				if ('/' == c) {					prs->tagtype = IKS_CLOSE;					break;				}				if ('?' == c) {					prs->context = C_PI;					break;				}				if ('!' == c) {					prs->context = C_MARKUP;					break;				}				prs->tagtype = IKS_OPEN;				stack_old = pos;				break;			case C_TAG:				if (IS_WHITESPACE(c)) {					if (IKS_CLOSE == prs->tagtype)						prs->oldcontext = C_TAG_END;					else						prs->oldcontext = C_ATTRIBUTE;					prs->context = C_WHITESPACE;					if (stack_old != -1) STACK_PUSH (buf + stack_old, pos - stack_old);					stack_old = -1;					STACK_PUSH_END;					break;				}				if ('/' == c) {					if (IKS_CLOSE == prs->tagtype) return IKS_BADXML;					prs->tagtype = IKS_SINGLE;					prs->context = C_TAG_END;					if (stack_old != -1) STACK_PUSH (buf + stack_old, pos - stack_old);					stack_old = -1;					STACK_PUSH_END;					break;				}				if ('>' == c) {					prs->context = C_TAG_END;					if (stack_old != -1) STACK_PUSH (buf + stack_old, pos - stack_old);					stack_old = -1;					STACK_PUSH_END;					re = 1;					break;				}				if (stack_old == -1) stack_old = pos;				break;			case C_TAG_END:				if (c != '>') return IKS_BADXML;				if (prs->tagHook) {					char **tmp;					if (prs->attcur == 0) tmp = NULL; else tmp = prs->atts;					err = prs->tagHook (prs->user_data, prs->tag_name, tmp, prs->tagtype);					if (IKS_OK != err) return err;				}				prs->stack_pos = 0;				stack_old = -1;				prs->attcur = 0;				prs->attflag = 0;				prs->context = C_CDATA;				old = pos + 1;				break;			case C_ATTRIBUTE:				if ('/' == c) {					prs->tagtype = IKS_SINGLE;					prs->context = C_TAG_END;					break;				}				if ('>' == c) {					prs->context = C_TAG_END;					re = 1;					break;				}				if (!prs->atts) {					prs->attmax = 12;					prs->atts = iks_malloc (sizeof(char *) * 2 * 12);					if (!prs->atts) return IKS_NOMEM;					memset (prs->atts, 0, sizeof(char *) * 2 * 12);					prs->attcur = 0;				} else {					if (prs->attcur >= (prs->attmax * 2)) {						void *tmp;						prs->attmax += 12;						tmp = iks_malloc (sizeof(char *) * 2 * prs->attmax);						if (!tmp) return IKS_NOMEM;						memset (tmp, 0, sizeof(char *) * 2 * prs->attmax);						memcpy (tmp, prs->atts, sizeof(char *) * prs->attcur);						free (prs->atts);						prs->atts = tmp;					}				}				prs->attflag = 1;				prs->atts[prs->attcur] = STACK_PUSH_START;				stack_old = pos;				prs->context = C_ATTRIBUTE_1;				break;			case C_ATTRIBUTE_1:				if ('=' == c) {					if (stack_old != -1) STACK_PUSH (buf + stack_old, pos - stack_old);					stack_old = -1;					STACK_PUSH_END;					prs->context = C_VALUE;					break;				}				if (stack_old == -1) stack_old = pos;				break;			case C_ATTRIBUTE_2:				if ('/' == c) {					prs->tagtype = IKS_SINGLE;					prs->atts[prs->attcur] = NULL;					prs->context = C_TAG_END;					break;				}				if ('>' == c) {					prs->atts[prs->attcur] = NULL;					prs->context = C_TAG_END;					re = 1;					break;				}				prs->context = C_ATTRIBUTE;				re = 1;				break;			case C_VALUE:				prs->atts[prs->attcur + 1] = STACK_PUSH_START;				if ('\'' == c) {					prs->context = C_VALUE_APOS;					break;				}				if ('"' == c) {					prs->context = C_VALUE_QUOT;					break;				}				return IKS_BADXML;			case C_VALUE_APOS:				if ('\'' == c) {					if (stack_old != -1) STACK_PUSH (buf + stack_old, pos - stack_old);					stack_old = -1;					STACK_PUSH_END;					prs->oldcontext = C_ATTRIBUTE_2;					prs->context = C_WHITESPACE;					prs->attcur += 2;				}				if (stack_old == -1) stack_old = pos;				break;			case C_VALUE_QUOT:				if ('"' == c) {					if (stack_old != -1) STACK_PUSH (buf + stack_old, pos - stack_old);					stack_old = -1;					STACK_PUSH_END;					prs->oldcontext = C_ATTRIBUTE_2;					prs->context = C_WHITESPACE;					prs->attcur += 2;				}				if (stack_old == -1) stack_old = pos;				break;			case C_WHITESPACE:				if (NOT_WHITESPACE(c)) {					prs->context = prs->oldcontext;					re = 1;				}				break;			case C_ENTITY:				if (';' == c) {					char hede[2];					char t = '?';					prs->entity[prs->entpos] = '\0';					if (strcmp(prs->entity, "amp") == 0)						t = '&';					else if (strcmp(prs->entity, "quot") == 0)						t = '"';					else if (strcmp(prs->entity, "apos") == 0)						t = '\'';					else if (strcmp(prs->entity, "lt") == 0)						t = '<';					else if (strcmp(prs->entity, "gt") == 0)						t = '>';					old = pos + 1;					hede[0] = t;					if (prs->cdataHook) {						err = prs->cdataHook (prs->user_data, &hede[0], 1);						if (IKS_OK != err) return err;					}					prs->context = C_CDATA;				} else {					prs->entity[prs->entpos++] = buf[pos];					if (prs->entpos > 7) return IKS_BADXML;				}				break;			case C_COMMENT:				if ('-' != c) return IKS_BADXML;				prs->context = C_COMMENT_1;				break;			case C_COMMENT_1:				if ('-' == c) prs->context = C_COMMENT_2;				break;			case C_COMMENT_2:				if ('-' == c)					prs->context = C_COMMENT_3;				else					prs->context = C_COMMENT_1;				break;			case C_COMMENT_3:				if ('>' != c) return IKS_BADXML;				prs->context = C_CDATA;				old = pos + 1;				break;			case C_MARKUP:				if ('[' == c) {					prs->context = C_SECT;					break;				}				if ('-' == c) {					prs->context = C_COMMENT;					break;				}				prs->context = C_MARKUP_1;			case C_MARKUP_1:				if ('>' == c) {					old = pos + 1;					prs->context = C_CDATA;				}				break;			case C_SECT:				if ('C' == c) {					prs->context = C_SECT_CDATA;					break;				}				return IKS_BADXML;			case C_SECT_CDATA:				if ('D' != c) return IKS_BADXML;				prs->context = C_SECT_CDATA_1;				break;			case C_SECT_CDATA_1:				if ('A' != c) return IKS_BADXML;				prs->context = C_SECT_CDATA_2;				break;			case C_SECT_CDATA_2:				if ('T' != c) return IKS_BADXML;				prs->context = C_SECT_CDATA_3;				break;			case C_SECT_CDATA_3:				if ('A' != c) return IKS_BADXML;				prs->context = C_SECT_CDATA_4;				break;			case C_SECT_CDATA_4:				if ('[' != c) return IKS_BADXML;				old = pos + 1;				prs->context = C_SECT_CDATA_C;				break;			case C_SECT_CDATA_C:				if (']' == c) {					prs->context = C_SECT_CDATA_E;					if (prs->cdataHook && old < pos) {						err = prs->cdataHook (prs->user_data, &buf[old], pos - old);						if (IKS_OK != err) return err;					}				}				break;			case C_SECT_CDATA_E:				if (']' == c) {					prs->context = C_SECT_CDATA_E2;				} else {					if (prs->cdataHook) {						err = prs->cdataHook (prs->user_data, "]", 1);						if (IKS_OK != err) return err;					}					old = pos;					prs->context = C_SECT_CDATA_C;				}				break;			case C_SECT_CDATA_E2:				if ('>' == c) {					old = pos + 1;					prs->context = C_CDATA;				} else {					if (prs->cdataHook) {						err = prs->cdataHook (prs->user_data, "]]", 2);						if (IKS_OK != err) return err;					}					old = pos;					prs->context = C_SECT_CDATA_C;				}				break;			case C_PI:				old = pos + 1;				if ('>' == c) prs->context = C_CDATA;				break;		}cont:		if (0 == re) {			pos++;			prs->nr_bytes++;			if ('\n' == c) prs->nr_lines++;		}	}	if (stack_old != -1)		STACK_PUSH (buf + stack_old, pos - stack_old);	err = IKS_OK;	if (prs->cdataHook && (prs->context == C_CDATA || prs->context == C_SECT_CDATA_C) && old < pos)		err = prs->cdataHook (prs->user_data, &buf[old], pos - old);	return err;}intiks_parse (iksparser *prs, const char *data, size_t len, int finish){	if (!data) return IKS_OK;	if (len == 0) len = strlen (data);	return sax_core (prs, (char *) data, len);}voidiks_parser_reset (iksparser *prs){	if (prs->deleteHook) prs->deleteHook (prs->user_data);	prs->stack_pos = 0;	prs->context = 0;	prs->oldcontext = 0;	prs->tagtype = 0;	prs->attcur = 0;	prs->attflag = 0;	prs->valflag = 0;	prs->entpos = 0;	prs->nr_bytes = 0;	prs->nr_lines = 0;	prs->uni_max = 0;	prs->uni_len = 0;}voidiks_parser_delete (iksparser *prs){	if (prs->deleteHook) prs->deleteHook (prs->user_data);	if (prs->stack) iks_free (prs->stack);	if (prs->atts) iks_free (prs->atts);	if (prs->s) iks_stack_delete (prs->s); else iks_free (prs);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -