token.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 470 行
CPP
470 行
/* * Copyright 2001,2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Log: Token.cpp,v $ * Revision 1.7 2004/09/08 13:56:47 peiyongz * Apache License Version 2.0 * * Revision 1.6 2003/12/17 00:18:37 cargilld * Update to memory management so that the static memory manager (one used to call Initialize) is only for static data. * * Revision 1.5 2002/11/21 14:56:35 gareth * Fixed bug in Token::analyzeFirstCharacter so that . matches new line with head character optimisation enabled. As per discussion Jennifer Schachter had with Khaled. * * Revision 1.4 2002/11/04 15:17:00 tng * C++ Namespace Support. * * Revision 1.3 2002/10/15 18:11:02 knoaman * [Bug 13489]: missing 'return' in Token.cpp * * Revision 1.2 2002/03/18 19:29:53 knoaman * Change constant names to eliminate possible conflict with user defined ones. * * Revision 1.1.1.1 2002/02/01 22:22:31 peiyongz * sane_include * * Revision 1.3 2001/05/11 13:26:50 tng * Copyright update. * * Revision 1.2 2001/05/03 18:17:49 knoaman * Some design changes: * o Changed the TokenFactory from a single static instance, to a * normal class. Each RegularExpression object will have its own * instance of TokenFactory, and that instance will be passed to * other classes that need to use a TokenFactory to create Token * objects (with the exception of RangeTokenMap). * o Added a new class RangeTokenMap to map a the different ranges * in a given category to a specific RangeFactory object. In the old * design RangeFactory had dual functionality (act as a Map, and as * a factory for creating RangeToken(s)). The RangeTokenMap will * have its own copy of the TokenFactory. There will be only one * instance of the RangeTokenMap class, and that instance will be * lazily deleted when XPlatformUtils::Terminate is called. * * Revision 1.1 2001/03/02 19:22:58 knoaman * Schema: Regular expression handling part I * */// ---------------------------------------------------------------------------// Includes// ---------------------------------------------------------------------------#include <xercesc/util/regx/RangeToken.hpp>#include <xercesc/util/regx/ModifierToken.hpp>#include <xercesc/util/regx/RegularExpression.hpp>#include <xercesc/util/regx/RegxUtil.hpp>XERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------// Static member data initialization// ---------------------------------------------------------------------------const XMLInt32 Token::UTF16_MAX = 0x10FFFF;const unsigned short Token::FC_CONTINUE = 0;const unsigned short Token::FC_TERMINAL = 1;const unsigned short Token::FC_ANY = 2;// ---------------------------------------------------------------------------// Token: Constructors and Destructors// ---------------------------------------------------------------------------Token::Token(const unsigned short tokType , MemoryManager* const manager) : fTokenType(tokType) , fMemoryManager(manager){}Token::~Token() {}// ---------------------------------------------------------------------------// Token: Getter mthods// ---------------------------------------------------------------------------int Token::getMinLength() const { switch (fTokenType) { case T_CONCAT: { int sum = 0; unsigned int childSize = size(); for (unsigned int i=0; i<childSize; i++) { sum += getChild(i)->getMinLength(); } return sum; } case T_CONDITION: case T_UNION: { unsigned int childSize = size(); if (childSize == 0) { return 0; } int ret = getChild(0)->getMinLength(); for (unsigned int i=1; i < childSize; i++) { int min = getChild(i)->getMinLength(); if (min < ret) ret = min; } return ret; } case T_CLOSURE: case T_NONGREEDYCLOSURE: if (getMin() >= 0) return getMin() * getChild(0)->getMinLength(); return 0; case T_EMPTY: case T_ANCHOR: return 0; case T_DOT: case T_CHAR: case T_RANGE: case T_NRANGE: return 1; case T_INDEPENDENT: case T_PAREN: case T_MODIFIERGROUP: return getChild(0)->getMinLength(); case T_BACKREFERENCE: return 0; // ***** - REVISIT case T_STRING: return XMLString::stringLen(getString()); case T_LOOKAHEAD: case T_NEGATIVELOOKAHEAD: case T_LOOKBEHIND: case T_NEGATIVELOOKBEHIND: return 0; // ***** - REVIST// default:// throw; } // We should not get here, but we have it to make some compilers happy return -1;}int Token::getMaxLength() const { switch (fTokenType) { case T_CONCAT: { int sum = 0; unsigned int childSize = size(); for (unsigned int i=0; i<childSize; i++) { int val = getChild(i)->getMaxLength(); if (val < 0){ return -1; } sum += val; } return sum; } case T_CONDITION: case T_UNION: { unsigned int childSize = size(); if (childSize == 0) return 0; int ret = getChild(0)->getMaxLength(); for (unsigned i = 1; ret > 0 && i < childSize; i++) { int max = getChild(i)->getMaxLength(); if (max < 0) { ret = -1; break; } if (max > ret) ret = max; } return ret; } case T_CLOSURE: case T_NONGREEDYCLOSURE: if (getMax() >= 0) { return getMax() * getChild(0)->getMaxLength(); } return -1; case T_EMPTY: case T_ANCHOR: return 0; case T_CHAR: return 1; case T_DOT: case T_RANGE: case T_NRANGE: return 2; case T_INDEPENDENT: case T_PAREN: case T_MODIFIERGROUP: return getChild(0)->getMaxLength(); case T_BACKREFERENCE: return -1; // REVISIT case T_STRING: return XMLString::stringLen(getString()); case T_LOOKAHEAD: case T_NEGATIVELOOKAHEAD: case T_LOOKBEHIND: case T_NEGATIVELOOKBEHIND: return 0; // REVISIT// default:// throw; //ThrowXML(RuntimeException, ...) } // end switch return -1;}// ---------------------------------------------------------------------------// Token: Helper mthods// ---------------------------------------------------------------------------int Token::analyzeFirstCharacter(RangeToken* const rangeTok, const int options, TokenFactory* const tokFactory){ switch(fTokenType) { case T_CONCAT: { int ret = FC_CONTINUE; for (int i=0; i<size(); i++) { Token* tok = getChild(i); if (tok && (ret=tok->analyzeFirstCharacter(rangeTok, options, tokFactory))!= FC_CONTINUE) break; } return ret; } case T_UNION: { unsigned int childSize = size(); if (childSize == 0) return FC_CONTINUE; int ret = FC_CONTINUE; bool hasEmpty = false; for (unsigned int i=0; i < childSize; i++) { ret = getChild(i)->analyzeFirstCharacter(rangeTok, options, tokFactory); if (ret == FC_ANY) break; else hasEmpty = true; } return hasEmpty ? FC_CONTINUE : ret; } case T_CONDITION: { int ret1 = getChild(0)->analyzeFirstCharacter(rangeTok, options, tokFactory); if (size() == 1) return FC_CONTINUE; int ret2; if (ret1 != FC_ANY) { ret2 = getChild(1)->analyzeFirstCharacter(rangeTok, options, tokFactory); } if (ret1 == FC_ANY || ret2 == FC_ANY) return FC_ANY; if (ret1 == FC_CONTINUE || ret2 == FC_CONTINUE) return FC_CONTINUE; return FC_TERMINAL; } case T_CLOSURE: case T_NONGREEDYCLOSURE: { Token* tok = getChild(0); if (tok) tok->analyzeFirstCharacter(rangeTok, options, tokFactory); return FC_CONTINUE; } case T_DOT: return FC_ANY; case T_EMPTY: case T_ANCHOR: return FC_CONTINUE; case T_CHAR: { XMLInt32 ch = getChar(); rangeTok->addRange(ch, ch); if (ch < 0x1000 && isSet(options,RegularExpression::IGNORE_CASE)) { //REVISIT } } return FC_TERMINAL; case T_RANGE: { if (isSet(options, RegularExpression::IGNORE_CASE)) { rangeTok->mergeRanges(((RangeToken*) this)->getCaseInsensitiveToken(tokFactory)); } else { rangeTok->mergeRanges(this); } return FC_TERMINAL; } case T_NRANGE: { if (isSet(options, RegularExpression::IGNORE_CASE)) { RangeToken* caseITok = (((RangeToken*) this)->getCaseInsensitiveToken(tokFactory)); rangeTok->mergeRanges(RangeToken::complementRanges(caseITok, tokFactory, fMemoryManager)); } else { rangeTok->mergeRanges( RangeToken::complementRanges((RangeToken*) this, tokFactory, fMemoryManager)); } } case T_INDEPENDENT: case T_PAREN: { Token* tok = getChild(0); if (tok) return tok->analyzeFirstCharacter(rangeTok,options, tokFactory); } case T_MODIFIERGROUP: case T_BACKREFERENCE: rangeTok->addRange(0, UTF16_MAX); return FC_ANY; case T_STRING: { const XMLCh* str = getString(); XMLInt32 ch = str[0]; if (RegxUtil::isHighSurrogate((XMLCh) ch)) { } rangeTok->addRange(ch, ch); if (ch<0x10000 && isSet(options,RegularExpression::IGNORE_CASE)) { //REVISIT } } return FC_TERMINAL; case T_LOOKAHEAD: case T_NEGATIVELOOKAHEAD: case T_LOOKBEHIND: case T_NEGATIVELOOKBEHIND: return FC_CONTINUE;// default:// throw; } return 0;}Token* Token::findFixedString(int options, int& outOptions) { switch(fTokenType) { case T_CHAR: return 0; case T_STRING: outOptions = options; return this; case T_UNION: case T_CLOSURE: case T_NONGREEDYCLOSURE: case T_EMPTY: case T_ANCHOR: case T_RANGE: case T_NRANGE: case T_DOT: case T_BACKREFERENCE: case T_LOOKAHEAD: case T_NEGATIVELOOKAHEAD: case T_LOOKBEHIND: case T_NEGATIVELOOKBEHIND: case T_CONDITION: return 0; case T_INDEPENDENT: case T_PAREN: return getChild(0)->findFixedString(options, outOptions); case T_CONCAT: { Token* prevTok = 0; int prevOptions = 0; for (int i=0; i<size(); i++) { Token* tok = getChild(i)->findFixedString(options, outOptions); if (prevTok == 0 || prevTok->isShorterThan(tok)) { prevTok = tok; prevOptions = outOptions; } } outOptions = prevOptions; return prevTok; } case T_MODIFIERGROUP: { options |= ((ModifierToken *) this)->getOptions(); options &= ~((ModifierToken *) this)->getOptionsMask(); return getChild(0)->findFixedString(options, outOptions); } } // end switch return 0;}bool Token::isShorterThan(Token* const tok) { if (tok == 0) return false; if (getTokenType() != T_STRING && tok->getTokenType() != T_STRING) return false; //Should we throw an exception? int length = XMLString::stringLen(getString()); int tokLength = XMLString::stringLen(tok->getString()); return length < tokLength;}XERCES_CPP_NAMESPACE_END/** * End of file Token.cpp */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?