regxparser.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,465 行 · 第 1/3 页

CPP
1,465
字号
    processNext();	return fTokenFactory->getStringBegin();}Token* RegxParser::processBacksolidus_Z() {    processNext();    return fTokenFactory->getStringEnd2();}Token* RegxParser::processBacksolidus_z() {    processNext();    return fTokenFactory->getStringEnd();}Token* RegxParser::processBacksolidus_b() {    processNext();    return fTokenFactory->getWordEdge();}Token* RegxParser::processBacksolidus_B() {    processNext();    return fTokenFactory->getNotWordEdge();}Token* RegxParser::processBacksolidus_lt() {    processNext();    return fTokenFactory->getWordBegin();}Token* RegxParser::processBacksolidus_gt() {    processNext();    return fTokenFactory->getWordEnd();}Token* RegxParser::processStar(Token* const tok) {    processNext();    if (fState == REGX_T_QUESTION) {        processNext();        return fTokenFactory->createClosure(tok, true);    }    return fTokenFactory->createClosure(tok);}Token* RegxParser::processPlus(Token* const tok) {    processNext();    if (fState == REGX_T_QUESTION) {		processNext();		return fTokenFactory->createConcat(tok,			               fTokenFactory->createClosure(tok,true));    }    return fTokenFactory->createConcat(tok,		                        fTokenFactory->createClosure(tok));}Token* RegxParser::processQuestion(Token* const tok) {    processNext();    Token* parentTok = fTokenFactory->createUnion();    if (fState == REGX_T_QUESTION) {        processNext();        parentTok->addChild(fTokenFactory->createToken(Token::T_EMPTY), fTokenFactory);        parentTok->addChild(tok, fTokenFactory);    }    else {        parentTok->addChild(tok, fTokenFactory);        parentTok->addChild(fTokenFactory->createToken(Token::T_EMPTY), fTokenFactory);    }    return parentTok;}Token* RegxParser::processParen() {    processNext();    int num = fNoGroups++;    Token* tok = fTokenFactory->createParenthesis(parseRegx(true),num);    if (fState != REGX_T_RPAREN)        ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager);    processNext();    return tok;}Token* RegxParser::processParen2() {    processNext();    Token* tok = fTokenFactory->createParenthesis(parseRegx(), 0);    if (fState != REGX_T_RPAREN)        ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager);    processNext();    return tok;}Token* RegxParser::processCondition() {    if (fOffset + 1 >= fStringLen)		ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor4, fMemoryManager);    int refNo = -1;	Token* conditionTok = 0;	XMLInt32 ch = fString[fOffset];    if (chDigit_1 <= ch && ch <= chDigit_9) {        refNo = ch - chDigit_0;        fHasBackReferences =  true;        if (fReferences == 0) {            this->fReferences = new (fMemoryManager) RefVectorOf<ReferencePosition>(8, true, fMemoryManager);        }        fReferences->addElement(new (fMemoryManager) ReferencePosition(refNo, fOffset));        fOffset++;        if (fString[fOffset] != chCloseParen)            ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager);        fOffset++;    }    else {        if (ch == chQuestion) {            fOffset--;        }        processNext();        conditionTok = parseFactor();        switch(conditionTok->getTokenType()) {        case Token::T_LOOKAHEAD:        case Token::T_NEGATIVELOOKAHEAD:        case Token::T_LOOKBEHIND:        case Token::T_NEGATIVELOOKBEHIND:            break;        case Token::T_ANCHOR:            if (fState != REGX_T_RPAREN)				ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager);			break;        default:			ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor5, fMemoryManager);        }    }    processNext();    Token* yesPattern = parseRegx();    Token* noPattern = 0;    if (yesPattern->getTokenType() == Token::T_UNION) {        if (yesPattern->size() != 2)            ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor6, fMemoryManager);        noPattern = yesPattern->getChild(1);        yesPattern = yesPattern->getChild(0);    }    if (fState != REGX_T_RPAREN)        ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager);	processNext();	return fTokenFactory->createCondition(refNo,conditionTok,		                                             yesPattern,noPattern);}Token* RegxParser::processModifiers() {    // fOffset points to the next '?'.	// modifiers ::= [imsw]* ('-' [imsw]*)? ':'    int add = 0;    int mask = 0;    XMLInt32 ch = -1;    while (fOffset < fStringLen) {        int v = RegularExpression::getOptionValue(fString[fOffset]);        ch = fString[fOffset];        if (v == 0)            break;        add |= v;        fOffset++;    } // end while    if (fOffset >= fStringLen)        ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor2, fMemoryManager);    if (ch == chDash) {        fOffset++;        while(fOffset < fStringLen) {            int v = RegularExpression::getOptionValue(fString[fOffset]);            ch = fString[fOffset];            if (v == 0)                break;            mask |= v;            fOffset++;        }        if (fOffset >= fStringLen)            ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor2, fMemoryManager);    }    Token* tok = 0;    if (ch == chColon) {        fOffset++;		processNext();        tok = fTokenFactory->createModifierGroup(parseRegx(),add,mask);        if (fState != REGX_T_RPAREN)            ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager);        processNext();    }    else if (ch == chCloseParen) {        fOffset++;        processNext();        tok = fTokenFactory->createModifierGroup(parseRegx(),add,mask);    }    else {        ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor3, fMemoryManager);	}	return tok;}Token* RegxParser::processIndependent() {    processNext();	Token* tok = fTokenFactory->createLook(Token::T_INDEPENDENT, parseRegx());	if (fState != REGX_T_RPAREN)		ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager);    processNext();    return tok;}Token* RegxParser::processBacksolidus_c() {    XMLCh ch; //Must be in 0x0040-0x005F    if (fOffset >= fStringLen        || ((ch = fString[fOffset++]) & 0xFFE0) != 0x0040)        ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom1, fMemoryManager);    processNext();	return fTokenFactory->createChar(ch - 0x40);}Token* RegxParser::processBacksolidus_C() {	// REVISIT - Do we throw an exception - we do not want to throw too	// many exceptions    return 0;}Token* RegxParser::processBacksolidus_i() {    processNext();	return fTokenFactory->createChar(chLatin_i);}Token* RegxParser::processBacksolidus_I() {	//Ditto    return 0;}Token* RegxParser::processBacksolidus_g() {    processNext();    return fTokenFactory->getGraphemePattern();}Token* RegxParser::processBacksolidus_X() {    processNext();    return fTokenFactory->getCombiningCharacterSequence();}Token* RegxParser::processBackReference() {    int refNo = fCharData - chDigit_0;    Token* tok = fTokenFactory->createBackReference(refNo);    fHasBackReferences = true;    if (fReferences == 0) {        fReferences = new (fMemoryManager) RefVectorOf<ReferencePosition>(8, true, fMemoryManager);    }    fReferences->addElement(new (fMemoryManager) ReferencePosition(refNo, fOffset - 2));    processNext();    return tok;}Token* RegxParser::parseFactor() {    switch (fState) {    case REGX_T_CARET:        return processCaret();    case REGX_T_DOLLAR:        return processDollar();    case REGX_T_LOOKAHEAD:        return processLook(Token::T_LOOKAHEAD);    case REGX_T_NEGATIVELOOKAHEAD:        return processLook(Token::T_NEGATIVELOOKAHEAD);    case REGX_T_LOOKBEHIND:        return processLook(Token::T_LOOKBEHIND);    case REGX_T_NEGATIVELOOKBEHIND:        return processLook(Token::T_NEGATIVELOOKBEHIND);    case REGX_T_COMMENT:        processNext();        return fTokenFactory->createToken(Token::T_EMPTY);    case REGX_T_BACKSOLIDUS:        switch(fCharData) {        case chLatin_A:            return processBacksolidus_A();        case chLatin_Z:            return processBacksolidus_Z();        case chLatin_z:            return processBacksolidus_z();        case chLatin_b:            return processBacksolidus_B();        case chLatin_B:            return processBacksolidus_B();        case chOpenAngle:            return processBacksolidus_lt();        case chCloseAngle:            return processBacksolidus_gt();		}    }	Token* tok = parseAtom();	switch(fState) {    case REGX_T_STAR:        return processStar(tok);    case REGX_T_PLUS:        return processPlus(tok);    case REGX_T_QUESTION:        return processQuestion(tok);    case REGX_T_CHAR:        if (fCharData == chOpenCurly && fOffset < fStringLen) {            int min = 0;            int max = -1;            XMLInt32 ch = fString[fOffset++];            if (ch >= chDigit_0 && ch <= chDigit_9) {                min = ch - chDigit_0;                while (fOffset < fStringLen                       && (ch = fString[fOffset++]) >= chDigit_0                       && ch <= chDigit_9) {                    min = min*10 + ch - chDigit_0;                }                if (min < 0)                    ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier5, fString, fMemoryManager);            }            else {                ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier1, fString, fMemoryManager);            }            max = min;            if (ch == chComma) {                if (fOffset >= fStringLen) {                    ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier3, fString, fMemoryManager);                }                else if ((ch = fString[fOffset++]) >= chDigit_0 && ch <= chDigit_9) {                    max = ch - chDigit_0;                    while (fOffset < fStringLen                           && (ch = fString[fOffset++]) >= chDigit_0                           && ch <= chDigit_9) {                        max = max*10 + ch - chDigit_0;                    }                    if (max < 0)                        ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier5, fString, fMemoryManager);                    else if (min > max)                        ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier4, fString, fMemoryManager);                }                else {                    max = -1;                }            }            if (ch != chCloseCurly)  {                ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier2, fString, fMemoryManager);            }            if (checkQuestion(fOffset)) {                tok = fTokenFactory->createClosure(tok, true);                fOffset++;            }            else {                tok = fTokenFactory->createClosure(tok);            }            tok->setMin(min);            tok->setMax(max);            processNext();		}        break;	}	return tok;}Token* RegxParser::parseAtom() {    Token* tok = 0;    switch(fState) {    case REGX_T_LPAREN:        return processParen();    case REGX_T_LPAREN2:        return processParen2();    case REGX_T_CONDITION:        return processCondition();    case REGX_T_MODIFIERS:        return processModifiers();    case REGX_T_INDEPENDENT:        return processIndependent();    case REGX_T_DOT:        processNext();        tok = fTokenFactory->getDot();        break;    case REGX_T_LBRACKET:        return parseCharacterClass(true);    case REGX_T_SET_OPERATIONS:        return parseSetOperations();

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?