regxparser.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,465 行 · 第 1/3 页
CPP
1,465 行
case REGX_T_BACKSOLIDUS: switch(fCharData) { case chLatin_d: case chLatin_D: case chLatin_w: case chLatin_W: case chLatin_s: case chLatin_S: tok = getTokenForShorthand(fCharData); processNext(); return tok; case chLatin_c: return processBacksolidus_c(); case chLatin_C: return processBacksolidus_C(); case chLatin_i: return processBacksolidus_i(); case chLatin_I: return processBacksolidus_I(); case chLatin_g: return processBacksolidus_g(); case chLatin_X: return processBacksolidus_X(); case chDigit_0: case chDigit_1: case chDigit_2: case chDigit_3: case chDigit_4: case chDigit_5: case chDigit_6: case chDigit_7: case chDigit_8: case chDigit_9: return processBackReference(); case chLatin_p: case chLatin_P: { tok = processBacksolidus_pP(fCharData); if (tok == 0) { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom5, fMemoryManager); } } break; default: { XMLInt32 ch = decodeEscaped(); if (ch < 0x10000) { tok = fTokenFactory->createChar(ch); } else { XMLCh* surrogateStr = RegxUtil::decomposeToSurrogates(ch, fMemoryManager); ArrayJanitor<XMLCh> janSurrogate(surrogateStr, fMemoryManager); tok = fTokenFactory->createString(surrogateStr); } } break; } // end switch processNext(); break; case REGX_T_CHAR: if (fCharData == chOpenCurly || fCharData == chCloseCurly || fCharData == chCloseSquare) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom4, fMemoryManager); tok = fTokenFactory->createChar(fCharData); processNext(); break; default: ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom4, fMemoryManager); } //end switch return tok;}RangeToken* RegxParser::processBacksolidus_pP(const XMLInt32 ch) { processNext(); if (fState != REGX_T_CHAR || fCharData != chOpenCurly) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom2, fMemoryManager); int nameStart = fOffset; int nameEnd = XMLString::indexOf(fString,chCloseCurly,nameStart, fMemoryManager); if (nameEnd < 0) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom3, fMemoryManager); fOffset = nameEnd + 1; XMLCh* rangeName = (XMLCh*) fMemoryManager->allocate ( (nameEnd - nameStart + 1) * sizeof(XMLCh) );//new XMLCh[(nameEnd - nameStart) + 1]; ArrayJanitor<XMLCh> janRangeName(rangeName, fMemoryManager); XMLString::subString(rangeName, fString, nameStart, nameEnd, fMemoryManager); return fTokenFactory->getRange(rangeName, !(ch == chLatin_p));}XMLInt32 RegxParser::processCInCharacterClass(RangeToken* const, const XMLInt32) { return decodeEscaped();}RangeToken* RegxParser::parseCharacterClass(const bool useNRange) { setParseContext(S_INBRACKETS); processNext(); RangeToken* base = 0; RangeToken* tok = 0; bool nRange = false; if (fState == REGX_T_CHAR && fCharData == chCaret) { nRange = true; processNext(); if (useNRange) { tok = fTokenFactory->createRange(true); } else { base = fTokenFactory->createRange(); base->addRange(0, Token::UTF16_MAX); tok = fTokenFactory->createRange(); } } else { tok = fTokenFactory->createRange(); } bool firstLoop = true; while (fState != REGX_T_EOF) { if (fState == REGX_T_CHAR && fCharData == chCloseSquare && !firstLoop) break; bool end = false; XMLInt32 ch = fCharData; firstLoop = false; if (fState == REGX_T_BACKSOLIDUS) { switch(ch) { case chLatin_d: case chLatin_D: case chLatin_w: case chLatin_W: case chLatin_s: case chLatin_S: tok->mergeRanges(getTokenForShorthand(ch)); end = true; break; case chLatin_i: case chLatin_I: case chLatin_c: case chLatin_C: ch = processCInCharacterClass(tok, ch); if (ch < 0){ end = true; } break; case chLatin_p: case chLatin_P: { RangeToken* tok2 = processBacksolidus_pP(ch); if (tok2 == 0) { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom5, fMemoryManager); } tok->mergeRanges(tok2); end = true; } break; default: ch = decodeEscaped(); } } // end if REGX_T_BACKSOLIDUS else if (fState == REGX_T_POSIX_CHARCLASS_START) { int nameEnd = XMLString::indexOf(fString, chColon, fOffset, fMemoryManager); if (nameEnd < 0) { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC1, fMemoryManager); } bool positive = true; if (fString[fOffset] == chCaret) { fOffset++; positive = false; } XMLCh* name = (XMLCh*) fMemoryManager->allocate ( (nameEnd - fOffset + 1) * sizeof(XMLCh) );//new XMLCh[(nameEnd - fOffset) + 1]; ArrayJanitor<XMLCh> janName(name, fMemoryManager); XMLString::subString(name, fString, fOffset, nameEnd, fMemoryManager); RangeToken* rangeTok = fTokenFactory->getRange(name, !positive); if (rangeTok == 0) { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC3, fMemoryManager); } tok->mergeRanges(rangeTok); end = true; if (nameEnd+1 >= fStringLen || fString[nameEnd+1] != chCloseSquare) { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC1, fMemoryManager); } fOffset = nameEnd + 2; } processNext(); if (!end) { if (fState != REGX_T_CHAR || fCharData != chDash) { tok->addRange(ch, ch); } else { processNext(); if (fState == REGX_T_EOF) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, fMemoryManager); if (fState == REGX_T_CHAR && fCharData == chCloseSquare) { tok->addRange(ch, ch); tok->addRange(chDash, chDash); } else { XMLInt32 rangeEnd = fCharData; if (fState == REGX_T_BACKSOLIDUS) { rangeEnd = decodeEscaped(); } processNext(); tok->addRange(ch, rangeEnd); } } } if (isSet(RegularExpression::SPECIAL_COMMA) && fState == REGX_T_CHAR && fCharData == chComma) { processNext(); } } // end while fState if (fState == REGX_T_EOF) { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, fMemoryManager); } if (!useNRange && nRange) { base->subtractRanges(tok); tok = base; } tok->sortRanges(); tok->compactRanges(); setParseContext(S_NORMAL); processNext(); return tok;}RangeToken* RegxParser::parseSetOperations() { RangeToken* tok = parseCharacterClass(false); while (fState != REGX_T_RPAREN) { if (fState == REGX_T_CHAR && (fCharData == chDash || fCharData == chAmpersand) || fState == REGX_T_PLUS) { processNext(); if (fState != REGX_T_LBRACKET) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Ope1, fMemoryManager); RangeToken* tok2 = parseCharacterClass(false); if (fState == REGX_T_PLUS) { tok->mergeRanges(tok2); } else if (fCharData == chDash) { tok->subtractRanges(tok2); } else if (fCharData == chAmpersand) { tok->intersectRanges(tok2); } else { throw 0; // ThrowXMLwithMemMgr(RuntimeException, "ASSERT") } } else { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Ope2, fMemoryManager); } } processNext(); return tok;}Token* RegxParser::getTokenForShorthand(const XMLInt32 ch) { Token* tok = 0; bool useUnicode = isSet(RegularExpression::USE_UNICODE_CATEGORY); switch (ch) { case chLatin_d: tok = useUnicode ? fTokenFactory->getRange(fgUniDecimalDigit) : fTokenFactory->getRange(fgASCIIDigit); break; case chLatin_D: tok = useUnicode ? fTokenFactory->getRange(fgUniDecimalDigit, true) : fTokenFactory->getRange(fgASCIIDigit, true); break; case chLatin_w: tok = useUnicode ? fTokenFactory->getRange(fgUniIsWord) : fTokenFactory->getRange(fgASCIIWord); break; case chLatin_W: tok = useUnicode ? fTokenFactory->getRange(fgUniIsWord, true) : fTokenFactory->getRange(fgASCIIWord, true); break; case chLatin_s: tok = useUnicode ? fTokenFactory->getRange(fgUniIsSpace) : fTokenFactory->getRange(fgASCIISpace); break; case chLatin_S: tok = useUnicode ? fTokenFactory->getRange(fgUniIsSpace, true) : fTokenFactory->getRange(fgASCIISpace, true);// default:// ThrowXMLwithMemMgr(RuntimeException, "Invalid shorthand {0}", chAsString) } return tok;}XMLInt32 RegxParser::decodeEscaped() { if (fState != REGX_T_BACKSOLIDUS) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Next1, fMemoryManager); XMLInt32 ch = fCharData; switch (ch) { case chLatin_e: ch = 0x1B; // Escape break; case chLatin_f: ch = chFF; break; case chLatin_n: ch = chLF; break; case chLatin_r: ch = chCR; break; case chLatin_t: ch = chHTab; break; case chLatin_x: { processNext(); if (fState != REGX_T_CHAR) { ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Descape1, fMemoryManager); } if (fCharData == chOpenCurly) { int v1 = 0; XMLInt32 uv = 0; do { processNext(); if (fState != REGX_T_CHAR) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Descape1, fMemoryManager); if ((v1 = hexChar(fCharData)) < 0) break; uv = uv*16 + v1; } while (true); if (fCharData != chCloseCurly) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Descape3, fMemoryManager); if (uv > Token::UTF16_MAX) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Descape4, fMemoryManager); ch = uv; } else { int v1 = 0; if (fState != REGX_T_CHAR || (v1 = hexChar(fCharData)) < 0) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Descape1, fMemoryManager); int uv = v1; processNext(); if (fState != REGX_T_CHAR || (v1 = hexChar(fCharData)) < 0) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Descape1, fMemoryManager); ch = uv*16 + v1; } } break; case chLatin_u: { int v1 = 0; int uv = 0; for (int i=0; i< 4; i++) { processNext(); if (fState != REGX_T_CHAR || (v1 = hexChar(fCharData)) < 0) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Descape1, fMemoryManager); uv = (i == 0) ? v1 : uv*16 + v1; } ch = uv; } break; case chLatin_v: { int v1 = 0; int uv = 0; for (int i=0; i< 6; i++) { processNext(); if (fState != REGX_T_CHAR || (v1 = hexChar(fCharData)) < 0) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Descape1, fMemoryManager); uv = (i == 0) ? v1 : uv*16 + v1; } if (uv > Token::UTF16_MAX) ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Descape1, fMemoryManager); ch = uv; } break; case chLatin_A: case chLatin_Z: case chLatin_z: ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Descape5, fMemoryManager); } // end switch return ch;}// ---------------------------------------------------------------------------// RegxParser: Helper Methods// ---------------------------------------------------------------------------bool RegxParser::checkQuestion(const int off) { return ((off < fStringLen) && fString[off] == chQuestion);}XERCES_CPP_NAMESPACE_END/** * End file RegxParser.cpp */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?