regxparser.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,465 行 · 第 1/3 页
CPP
1,465 行
processNext(); return fTokenFactory->getStringBegin();}Token* RegxParser::processBacksolidus_Z() { processNext(); return fTokenFactory->getStringEnd2();}Token* RegxParser::processBacksolidus_z() { processNext(); return fTokenFactory->getStringEnd();}Token* RegxParser::processBacksolidus_b() { processNext(); return fTokenFactory->getWordEdge();}Token* RegxParser::processBacksolidus_B() { processNext(); return fTokenFactory->getNotWordEdge();}Token* RegxParser::processBacksolidus_lt() { processNext(); return fTokenFactory->getWordBegin();}Token* RegxParser::processBacksolidus_gt() { processNext(); return fTokenFactory->getWordEnd();}Token* RegxParser::processStar(Token* const tok) { processNext(); if (fState == REGX_T_QUESTION) { processNext(); return fTokenFactory->createClosure(tok, true); } return fTokenFactory->createClosure(tok);}Token* RegxParser::processPlus(Token* const tok) { processNext(); if (fState == REGX_T_QUESTION) { processNext(); return fTokenFactory->createConcat(tok, fTokenFactory->createClosure(tok,true)); } return fTokenFactory->createConcat(tok, fTokenFactory->createClosure(tok));}Token* RegxParser::processQuestion(Token* const tok) { processNext(); Token* parentTok = fTokenFactory->createUnion(); if (fState == REGX_T_QUESTION) { processNext(); parentTok->addChild(fTokenFactory->createToken(Token::T_EMPTY), fTokenFactory); parentTok->addChild(tok, fTokenFactory); } else { parentTok->addChild(tok, fTokenFactory); parentTok->addChild(fTokenFactory->createToken(Token::T_EMPTY), fTokenFactory); } return parentTok;}Token* RegxParser::processParen() { processNext(); int num = fNoGroups++; Token* tok = fTokenFactory->createParenthesis(parseRegx(true),num); if (fState != REGX_T_RPAREN) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager); processNext(); return tok;}Token* RegxParser::processParen2() { processNext(); Token* tok = fTokenFactory->createParenthesis(parseRegx(), 0); if (fState != REGX_T_RPAREN) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager); processNext(); return tok;}Token* RegxParser::processCondition() { if (fOffset + 1 >= fStringLen) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor4, fMemoryManager); int refNo = -1; Token* conditionTok = 0; XMLInt32 ch = fString[fOffset]; if (chDigit_1 <= ch && ch <= chDigit_9) { refNo = ch - chDigit_0; fHasBackReferences = true; if (fReferences == 0) { this->fReferences = new (fMemoryManager) RefVectorOf<ReferencePosition>(8, true, fMemoryManager); } fReferences->addElement(new (fMemoryManager) ReferencePosition(refNo, fOffset)); fOffset++; if (fString[fOffset] != chCloseParen) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager); fOffset++; } else { if (ch == chQuestion) { fOffset--; } processNext(); conditionTok = parseFactor(); switch(conditionTok->getTokenType()) { case Token::T_LOOKAHEAD: case Token::T_NEGATIVELOOKAHEAD: case Token::T_LOOKBEHIND: case Token::T_NEGATIVELOOKBEHIND: break; case Token::T_ANCHOR: if (fState != REGX_T_RPAREN) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager); break; default: ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor5, fMemoryManager); } } processNext(); Token* yesPattern = parseRegx(); Token* noPattern = 0; if (yesPattern->getTokenType() == Token::T_UNION) { if (yesPattern->size() != 2) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor6, fMemoryManager); noPattern = yesPattern->getChild(1); yesPattern = yesPattern->getChild(0); } if (fState != REGX_T_RPAREN) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager); processNext(); return fTokenFactory->createCondition(refNo,conditionTok, yesPattern,noPattern);}Token* RegxParser::processModifiers() { // fOffset points to the next '?'. // modifiers ::= [imsw]* ('-' [imsw]*)? ':' int add = 0; int mask = 0; XMLInt32 ch = -1; while (fOffset < fStringLen) { int v = RegularExpression::getOptionValue(fString[fOffset]); ch = fString[fOffset]; if (v == 0) break; add |= v; fOffset++; } // end while if (fOffset >= fStringLen) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor2, fMemoryManager); if (ch == chDash) { fOffset++; while(fOffset < fStringLen) { int v = RegularExpression::getOptionValue(fString[fOffset]); ch = fString[fOffset]; if (v == 0) break; mask |= v; fOffset++; } if (fOffset >= fStringLen) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor2, fMemoryManager); } Token* tok = 0; if (ch == chColon) { fOffset++; processNext(); tok = fTokenFactory->createModifierGroup(parseRegx(),add,mask); if (fState != REGX_T_RPAREN) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager); processNext(); } else if (ch == chCloseParen) { fOffset++; processNext(); tok = fTokenFactory->createModifierGroup(parseRegx(),add,mask); } else { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor3, fMemoryManager); } return tok;}Token* RegxParser::processIndependent() { processNext(); Token* tok = fTokenFactory->createLook(Token::T_INDEPENDENT, parseRegx()); if (fState != REGX_T_RPAREN) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager); processNext(); return tok;}Token* RegxParser::processBacksolidus_c() { XMLCh ch; //Must be in 0x0040-0x005F if (fOffset >= fStringLen || ((ch = fString[fOffset++]) & 0xFFE0) != 0x0040) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom1, fMemoryManager); processNext(); return fTokenFactory->createChar(ch - 0x40);}Token* RegxParser::processBacksolidus_C() { // REVISIT - Do we throw an exception - we do not want to throw too // many exceptions return 0;}Token* RegxParser::processBacksolidus_i() { processNext(); return fTokenFactory->createChar(chLatin_i);}Token* RegxParser::processBacksolidus_I() { //Ditto return 0;}Token* RegxParser::processBacksolidus_g() { processNext(); return fTokenFactory->getGraphemePattern();}Token* RegxParser::processBacksolidus_X() { processNext(); return fTokenFactory->getCombiningCharacterSequence();}Token* RegxParser::processBackReference() { int refNo = fCharData - chDigit_0; Token* tok = fTokenFactory->createBackReference(refNo); fHasBackReferences = true; if (fReferences == 0) { fReferences = new (fMemoryManager) RefVectorOf<ReferencePosition>(8, true, fMemoryManager); } fReferences->addElement(new (fMemoryManager) ReferencePosition(refNo, fOffset - 2)); processNext(); return tok;}Token* RegxParser::parseFactor() { switch (fState) { case REGX_T_CARET: return processCaret(); case REGX_T_DOLLAR: return processDollar(); case REGX_T_LOOKAHEAD: return processLook(Token::T_LOOKAHEAD); case REGX_T_NEGATIVELOOKAHEAD: return processLook(Token::T_NEGATIVELOOKAHEAD); case REGX_T_LOOKBEHIND: return processLook(Token::T_LOOKBEHIND); case REGX_T_NEGATIVELOOKBEHIND: return processLook(Token::T_NEGATIVELOOKBEHIND); case REGX_T_COMMENT: processNext(); return fTokenFactory->createToken(Token::T_EMPTY); case REGX_T_BACKSOLIDUS: switch(fCharData) { case chLatin_A: return processBacksolidus_A(); case chLatin_Z: return processBacksolidus_Z(); case chLatin_z: return processBacksolidus_z(); case chLatin_b: return processBacksolidus_B(); case chLatin_B: return processBacksolidus_B(); case chOpenAngle: return processBacksolidus_lt(); case chCloseAngle: return processBacksolidus_gt(); } } Token* tok = parseAtom(); switch(fState) { case REGX_T_STAR: return processStar(tok); case REGX_T_PLUS: return processPlus(tok); case REGX_T_QUESTION: return processQuestion(tok); case REGX_T_CHAR: if (fCharData == chOpenCurly && fOffset < fStringLen) { int min = 0; int max = -1; XMLInt32 ch = fString[fOffset++]; if (ch >= chDigit_0 && ch <= chDigit_9) { min = ch - chDigit_0; while (fOffset < fStringLen && (ch = fString[fOffset++]) >= chDigit_0 && ch <= chDigit_9) { min = min*10 + ch - chDigit_0; } if (min < 0) ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier5, fString, fMemoryManager); } else { ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier1, fString, fMemoryManager); } max = min; if (ch == chComma) { if (fOffset >= fStringLen) { ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier3, fString, fMemoryManager); } else if ((ch = fString[fOffset++]) >= chDigit_0 && ch <= chDigit_9) { max = ch - chDigit_0; while (fOffset < fStringLen && (ch = fString[fOffset++]) >= chDigit_0 && ch <= chDigit_9) { max = max*10 + ch - chDigit_0; } if (max < 0) ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier5, fString, fMemoryManager); else if (min > max) ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier4, fString, fMemoryManager); } else { max = -1; } } if (ch != chCloseCurly) { ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier2, fString, fMemoryManager); } if (checkQuestion(fOffset)) { tok = fTokenFactory->createClosure(tok, true); fOffset++; } else { tok = fTokenFactory->createClosure(tok); } tok->setMin(min); tok->setMax(max); processNext(); } break; } return tok;}Token* RegxParser::parseAtom() { Token* tok = 0; switch(fState) { case REGX_T_LPAREN: return processParen(); case REGX_T_LPAREN2: return processParen2(); case REGX_T_CONDITION: return processCondition(); case REGX_T_MODIFIERS: return processModifiers(); case REGX_T_INDEPENDENT: return processIndependent(); case REGX_T_DOT: processNext(); tok = fTokenFactory->getDot(); break; case REGX_T_LBRACKET: return parseCharacterClass(true); case REGX_T_SET_OPERATIONS: return parseSetOperations();
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?