📄 exmlparser.c
字号:
/* * exml.c -- A simple SAX style XML parser *//********************************* Description ********************************//* * This is a recursive descent parser for XML text files. It is a one-pass * simple parser that invokes a user supplied callback for key tokens in the * XML file. The user supplies a read function so that XML files can be parsed * from disk or in-memory. *//********************************** Includes **********************************/#include "exml.h"/****************************** Forward Declarations **************************//* MOB -- FIX */#if BLD_FEATURE_EXML || 1static int parseNext(Exml *xp, int state);static ExmlToken getToken(Exml *xp, int state);static int getNextChar(Exml *xp);static int scanFor(Exml *xp, char *str);static int putLastChar(Exml *xp, int c);static void error(Exml *xp, char *fmt, ...);static void trimToken(Exml *xp);/************************************ Code ************************************/Exml *exmlOpen(MprCtx ctx, int initialSize, int maxSize){ Exml *xp; xp = mprAllocTypeZeroed(ctx, Exml); xp->inBuf = mprCreateBuf(xp, EXML_BUFSIZE, EXML_BUFSIZE); xp->tokBuf = mprCreateBuf(xp, initialSize, maxSize); return xp;}/******************************************************************************/void exmlClose(Exml *xp){ mprAssert(xp); mprFree(xp);}/******************************************************************************/void exmlSetParserHandler(Exml *xp, ExmlHandler h){ mprAssert(xp); xp->handler = h;}/******************************************************************************/void exmlSetInputStream(Exml *xp, ExmlInputStream s, void *arg){ mprAssert(xp); xp->readFn = s; xp->inputArg = arg;}/******************************************************************************//* * Set the parse arg */ void exmlSetParseArg(Exml *xp, void *parseArg){ mprAssert(xp); xp->parseArg = parseArg;}/******************************************************************************//* * Set the parse arg */ void *exmlGetParseArg(Exml *xp){ mprAssert(xp); return xp->parseArg;}/******************************************************************************//* * Parse an XML file. Return 0 for success, -1 for error. */ int exmlParse(Exml *xp){ mprAssert(xp); return parseNext(xp, EXML_BEGIN);}/******************************************************************************//* * XML parser. This is a recursive descent parser. Return -1 for errors, 0 for * EOF and 1 if there is still more data to parse. */static int parseNext(Exml *xp, int state){ ExmlHandler handler; ExmlToken token; MprBuf *tokBuf; char *tname, *aname; int rc; mprAssert(state >= 0); tokBuf = xp->tokBuf; handler = xp->handler; tname = aname = 0; rc = 0; /* * In this parse loop, the state is never assigned EOF or ERR. In * such cases we always return EOF or ERR. */ while (1) { token = getToken(xp, state); if (token == TOKEN_TOO_BIG) { error(xp, "XML token is too big"); goto err; } switch (state) { case EXML_BEGIN: /* ------------------------------------------ */ /* * Expect to get an element, comment or processing instruction */ switch (token) { case TOKEN_EOF: goto exit; case TOKEN_LS: /* * Recurse to handle the new element, comment etc. */ rc = parseNext(xp, EXML_AFTER_LS); if (rc < 0) { goto exit; } break; default: error(xp, "Syntax error"); goto err; } break; case EXML_AFTER_LS: /* ------------------------------------------ */ switch (token) { case TOKEN_COMMENT: state = EXML_COMMENT; rc = (*handler)(xp, state, "!--", 0, mprGetBufStart(tokBuf)); if (rc < 0) { goto err; } rc = 1; goto exit; case TOKEN_CDATA: state = EXML_CDATA; rc = (*handler)(xp, state, "!--", 0, mprGetBufStart(tokBuf)); if (rc < 0) { goto err; } rc = 1; goto exit; case TOKEN_INSTRUCTIONS: /* Just ignore processing instructions */ rc = 1; goto exit; case TOKEN_TEXT: state = EXML_NEW_ELT; tname = mprStrdup(xp, mprGetBufStart(tokBuf)); if (tname == 0) { rc = MPR_ERR_MEMORY; goto exit; } rc = (*handler)(xp, state, tname, 0, 0); if (rc < 0) { goto err; } break; default: error(xp, "Syntax error"); goto err; } break; case EXML_NEW_ELT: /* ------------------------------------------ */ /* * We have seen the opening "<element" for a new element and have * not yet seen the terminating ">" of the opening element. */ switch (token) { case TOKEN_TEXT: /* * Must be an attribute name */ aname = mprStrdup(xp, mprGetBufStart(tokBuf)); token = getToken(xp, state); if (token != TOKEN_EQ) { error(xp, "Missing assignment for attribute \"%s\"", aname); goto err; } token = getToken(xp, state); if (token != TOKEN_TEXT) { error(xp, "Missing value for attribute \"%s\"", aname); goto err; } state = EXML_NEW_ATT; rc = (*handler)(xp, state, tname, aname, mprGetBufStart(tokBuf)); if (rc < 0) { goto err; } state = EXML_NEW_ELT; break; case TOKEN_GR: /* * This is ">" the termination of the opening element */ if (*tname == '\0') { error(xp, "Missing element name"); goto err; } /* * Tell the user that the opening element is now complete */ state = EXML_ELT_DEFINED; rc = (*handler)(xp, state, tname, 0, 0); if (rc < 0) { goto err; } state = EXML_ELT_DATA; break; case TOKEN_SLASH_GR: /* * If we see a "/>" then this is a solo element */ if (*tname == '\0') { error(xp, "Missing element name"); goto err; } state = EXML_SOLO_ELT_DEFINED; rc = (*handler)(xp, state, tname, 0, 0); if (rc < 0) { goto err; } rc = 1; goto exit; default: error(xp, "Syntax error"); goto err; } break; case EXML_ELT_DATA: /* -------------------------------------- */ /* * We have seen the full opening element "<name ...>" and now * await data or another element. */ if (token == TOKEN_LS) { /* * Recurse to handle the new element, comment etc. */ rc = parseNext(xp, EXML_AFTER_LS); if (rc < 0) { goto exit; } break; } else if (token == TOKEN_LS_SLASH) { state = EXML_END_ELT; break; } else if (token != TOKEN_TEXT) { goto err; } if (mprGetBufLength(tokBuf) > 0) { /* * Pass the data between the element to the user */ rc = (*handler)(xp, state, tname, 0, mprGetBufStart(tokBuf)); if (rc < 0) { goto err; } } break; case EXML_END_ELT: /* -------------------------------------- */ if (token != TOKEN_TEXT) { error(xp, "Missing closing element name for \"%s\"", tname); goto err; } /* * The closing element name must match the opening element name */ if (strcmp(tname, mprGetBufStart(tokBuf)) != 0) { error(xp, "Closing element name \"%s\" does not match on line %d" "opening name \"%s\"", mprGetBufStart(tokBuf), xp->lineNumber, tname); goto err; } rc = (*handler)(xp, state, tname, 0, 0); if (rc < 0) { goto err; } if (getToken(xp, state) != TOKEN_GR) { error(xp, "Syntax error"); goto err; } return 1; case EXML_EOF: /* ---------------------------------------------- */ goto exit; case EXML_ERR: /* ---------------------------------------------- */ default: goto err; } } mprAssert(0);err: rc = -1;exit: mprFree(tname); mprFree(aname); return rc;}/******************************************************************************//* * Lexical analyser for XML. Return the next token reading input as required. * It uses a one token look ahead and push back mechanism (LAR1 parser). * Text token identifiers are left in the tokBuf parser buffer on exit. * This Lex has special cases for the states EXML_ELT_DATA where we * have an optimized read of element data, and EXML_AFTER_LS where we * distinguish between element names, processing instructions and comments. */static ExmlToken getToken(Exml *xp, int state){ MprBuf *tokBuf, *inBuf; uchar *cp; int c, rc; tokBuf = xp->tokBuf; inBuf = xp->inBuf;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -