regularexpression.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,678 行 · 第 1/4 页

CPP
1,678
字号
/* * Copyright 2001-2004 The Apache Software Foundation. *  * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *  *      http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Log: RegularExpression.cpp,v $ * Revision 1.23  2004/09/08 13:56:47  peiyongz * Apache License Version 2.0 * * Revision 1.22  2004/01/13 20:05:00  peiyongz * revert code back to previous version * * Revision 1.19  2003/12/24 15:24:15  cargilld * More updates to memory management so that the static memory manager. * * Revision 1.18  2003/12/17 05:16:59  neilg * ensure all uses of ArrayJanitor use a memory manager * * Revision 1.17  2003/12/17 00:18:37  cargilld * Update to memory management so that the static memory manager (one used to call Initialize) is only for static data. * * Revision 1.16  2003/12/16 12:25:48  cargilld * Change a conditional expression to an if-else to avoid a compiler problem. * * Revision 1.15  2003/10/01 16:32:40  neilg * improve handling of out of memory conditions, bug #23415.  Thanks to David Cargill. * * Revision 1.14  2003/08/14 02:57:27  knoaman * Code refactoring to improve performance of validation. * * Revision 1.13  2003/05/25 21:42:41  knoaman * Allocate/Deallocate Context::xxx only when necessary. * * Revision 1.12  2003/05/18 14:02:06  knoaman * Memory manager implementation: pass per instance manager. * * Revision 1.11  2003/05/16 21:37:00  knoaman * Memory manager implementation: Modify constructors to pass in the memory manager. * * Revision 1.10  2003/05/16 06:01:57  knoaman * Partial implementation of the configurable memory manager. * * Revision 1.9  2003/05/16 00:03:10  knoaman * Partial implementation of the configurable memory manager. * * Revision 1.8  2003/05/15 18:42:54  knoaman * Partial implementation of the configurable memory manager. * * Revision 1.7  2003/05/12 10:08:22  gareth * The correct file this time. * * Revision 1.5  2002/12/18 13:01:02  gareth * New functionality - tokenize and replace. Fixed REVISIT for case insensitive match. Patch by Jennifer Schachter. * * Revision 1.4  2002/11/04 15:17:00  tng * C++ Namespace Support. * * Revision 1.3  2002/10/15 18:56:02  knoaman * [Bug 13604] while loop never terminates. * * Revision 1.2  2002/03/18 19:29:53  knoaman * Change constant names to eliminate possible conflict with user defined ones. * * Revision 1.1.1.1  2002/02/01 22:22:30  peiyongz * sane_include * * Revision 1.6  2002/01/02 20:09:11  knoaman * Fix for regular expression patterns that begin with ".". * * Revision 1.5  2001/10/09 12:20:25  tng * Leak fix: Need to delete fMatch if adopted. * * Revision 1.4  2001/05/11 21:50:58  knoaman * Schema updates and fixes. * * Revision 1.3  2001/05/11 13:26:46  tng * Copyright update. * * Revision 1.2  2001/05/03 18:17:42  knoaman * Some design changes: * o Changed the TokenFactory from a single static instance, to a *    normal class. Each RegularExpression object will have its own *    instance of TokenFactory, and that instance will be passed to *    other classes that need to use a TokenFactory to create Token *    objects (with the exception of RangeTokenMap). * o Added a new class RangeTokenMap to map a the different ranges *    in a given category to a specific RangeFactory object. In the old *    design RangeFactory had dual functionality (act as a Map, and as *    a factory for creating RangeToken(s)). The RangeTokenMap will *    have its own copy of the TokenFactory. There will be only one *    instance of the RangeTokenMap class, and that instance will be *    lazily deleted when XPlatformUtils::Terminate is called. * * Revision 1.1  2001/03/02 19:22:52  knoaman * Schema: Regular expression handling part I * */// ---------------------------------------------------------------------------//  Includes// ---------------------------------------------------------------------------#include <xercesc/util/regx/RegularExpression.hpp>#include <xercesc/util/PlatformUtils.hpp>#include <xercesc/util/regx/RegxUtil.hpp>#include <xercesc/util/regx/Match.hpp>#include <xercesc/util/regx/RangeToken.hpp>#include <xercesc/util/regx/RegxDefs.hpp>#include <xercesc/util/regx/XMLUniCharacter.hpp>#include <xercesc/util/regx/ParserForXMLSchema.hpp>#include <xercesc/util/Janitor.hpp>#include <xercesc/util/ParseException.hpp>#include <xercesc/util/IllegalArgumentException.hpp>#include <xercesc/framework/XMLBuffer.hpp>#include <xercesc/util/OutOfMemoryException.hpp>XERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------//  Static member data initialization// ---------------------------------------------------------------------------const unsigned int RegularExpression::MARK_PARENS = 1;const unsigned int RegularExpression::IGNORE_CASE = 2;const unsigned int RegularExpression::SINGLE_LINE = 4;const unsigned int RegularExpression::MULTIPLE_LINE = 8;const unsigned int RegularExpression::EXTENDED_COMMENT = 16;const unsigned int RegularExpression::USE_UNICODE_CATEGORY = 32;const unsigned int RegularExpression::UNICODE_WORD_BOUNDARY = 64;const unsigned int RegularExpression::PROHIBIT_HEAD_CHARACTER_OPTIMIZATION = 128;const unsigned int RegularExpression::PROHIBIT_FIXED_STRING_OPTIMIZATION = 256;const unsigned int RegularExpression::XMLSCHEMA_MODE = 512;const unsigned int RegularExpression::SPECIAL_COMMA = 1024;const unsigned short RegularExpression::WT_IGNORE = 0;const unsigned short RegularExpression::WT_LETTER = 1;const unsigned short RegularExpression::WT_OTHER = 2;RangeToken*          RegularExpression::fWordRange = 0;// ---------------------------------------------------------------------------//  RegularExpression::Context: Constructors and Destructor// ---------------------------------------------------------------------------RegularExpression::Context::Context(MemoryManager* const manager) :    	fAdoptMatch(false)    , fStart(0)	, fLimit(0)	, fLength(0)	, fSize(0)    , fStringMaxLen(0)	, fOffsets(0)	, fMatch(0)	, fString(0)    , fMemoryManager(manager){}RegularExpression::Context::~Context(){	if (fOffsets)        fMemoryManager->deallocate(fOffsets);//delete [] fOffsets;    fMemoryManager->deallocate(fString);//delete [] fString;	if (fAdoptMatch)		delete fMatch;}// ---------------------------------------------------------------------------//  RegularExpression::Context: Public methods// ---------------------------------------------------------------------------void RegularExpression::Context::reset(const XMLCh* const string                                       , const int stringLen                                       , const int start                                       , const int limit                                       , const int noClosures){    if (stringLen > fStringMaxLen || !fString) {        fStringMaxLen = stringLen;        if (fString)            fMemoryManager->deallocate(fString);        fString = XMLString::replicate(string, fMemoryManager);    }    else {        memcpy(fString, string, (stringLen + 1) * sizeof(XMLCh));    }	fStart = start;	fLimit = limit;	fLength = fLimit - fStart;		if (fAdoptMatch)		delete fMatch;	fMatch = 0;	if (fSize != noClosures) {		if (fOffsets)            fMemoryManager->deallocate(fOffsets);//delete [] fOffsets;		fOffsets = (int*) fMemoryManager->allocate(noClosures * sizeof(int));//new int[noClosures];	}	fSize = noClosures;	for (int i = 0; i< fSize; i++)		fOffsets[i] = -1;}bool RegularExpression::Context::nextCh(XMLInt32& ch, int& offset,										const short direction){	ch = fString[offset];	if (RegxUtil::isHighSurrogate(ch)) {		if ((offset + 1 < fLimit) && (direction > 0) &&			RegxUtil::isLowSurrogate(fString[offset+1])) {				ch = RegxUtil::composeFromSurrogate(ch, fString[++offset]);		}		else			return false;	}	else if (RegxUtil::isLowSurrogate(ch)) {		if ((offset - 1 >= 0) && (direction <= 0) &&			RegxUtil::isHighSurrogate(fString[offset-1])) {				ch = RegxUtil::composeFromSurrogate(fString[--offset], ch);		}		else			return false;	}	return true;}// ---------------------------------------------------------------------------//  RegularExpression: Constructors and Destructors// ---------------------------------------------------------------------------RegularExpression::RegularExpression(const char* const pattern,                                     MemoryManager* const manager)	:fHasBackReferences(false),	 fFixedStringOnly(false),	 fNoGroups(0),	 fMinLength(0),	 fNoClosures(0),	 fOptions(0),	 fBMPattern(0),	 fPattern(0),	 fFixedString(0),	 fOperations(0),	 fTokenTree(0),	 fFirstChar(0),     fOpFactory(manager),     fTokenFactory(0),     fMemoryManager(manager){	try {		XMLCh* tmpBuf = XMLString::transcode(pattern, fMemoryManager);        ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);		setPattern(tmpBuf);	}    catch(const OutOfMemoryException&)    {        throw;    }    catch (...) {		cleanUp();		throw;	}}RegularExpression::RegularExpression(const char* const pattern,									 const char* const options,                                     MemoryManager* const manager)	:fHasBackReferences(false),	 fFixedStringOnly(false),	 fNoGroups(0),	 fMinLength(0),	 fNoClosures(0),	 fOptions(0),	 fBMPattern(0),	 fPattern(0),	 fFixedString(0),	 fOperations(0),	 fTokenTree(0),	 fFirstChar(0),     fOpFactory(manager),     fTokenFactory(0),     fMemoryManager(manager){	try {		XMLCh* tmpBuf = XMLString::transcode(pattern, fMemoryManager);		ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);		XMLCh* tmpOptions = XMLString::transcode(options, fMemoryManager);		ArrayJanitor<XMLCh> janOps(tmpOptions, fMemoryManager);		setPattern(tmpBuf, tmpOptions);	}    catch(const OutOfMemoryException&)    {        throw;    }    catch (...) {		cleanUp();		throw;	}}RegularExpression::RegularExpression(const XMLCh* const pattern,                                     MemoryManager* const manager)	:fHasBackReferences(false),	 fFixedStringOnly(false),	 fNoGroups(0),	 fMinLength(0),	 fNoClosures(0),	 fOptions(0),	 fBMPattern(0),	 fPattern(0),	 fFixedString(0),	 fOperations(0),	 fTokenTree(0),	 fFirstChar(0),     fOpFactory(manager),     fTokenFactory(0),     fMemoryManager(manager){	try {		setPattern(pattern);	}    catch(const OutOfMemoryException&)    {        throw;    }    catch (...) {		cleanUp();		throw;	}}RegularExpression::RegularExpression(const XMLCh* const pattern,									 const XMLCh* const options,                                     MemoryManager* const manager)	:fHasBackReferences(false),	 fFixedStringOnly(false),	 fNoGroups(0),	 fMinLength(0),	 fNoClosures(0),	 fOptions(0),	 fBMPattern(0),	 fPattern(0),	 fFixedString(0),	 fOperations(0),	 fTokenTree(0),	 fFirstChar(0),     fOpFactory(manager),     fTokenFactory(0),     fMemoryManager(manager){	try {		setPattern(pattern, options);	}    catch(const OutOfMemoryException&)    {        throw;    }    catch (...) {		cleanUp();		throw;	}}RegularExpression::~RegularExpression() {	cleanUp();}// ---------------------------------------------------------------------------//  RegularExpression: Setter methods// ---------------------------------------------------------------------------void RegularExpression::setPattern(const XMLCh* const pattern,								   const XMLCh* const options) {    fTokenFactory = new (fMemoryManager) TokenFactory(fMemoryManager);	fOptions = parseOptions(options);	fPattern = XMLString::replicate(pattern, fMemoryManager);    // the following construct causes an error in an Intel 7.1 32 bit compiler for     // red hat linux 7.2    // (when an exception is thrown the wrong object is deleted)    //RegxParser* regxParser = isSet(fOptions, XMLSCHEMA_MODE)    //	? new (fMemoryManager) ParserForXMLSchema(fMemoryManager)     //    : new (fMemoryManager) RegxParser(fMemoryManager);    RegxParser* regxParser;    if (isSet(fOptions, XMLSCHEMA_MODE)) {	    regxParser = new (fMemoryManager) ParserForXMLSchema(fMemoryManager);    }    else {        regxParser = new (fMemoryManager) RegxParser(fMemoryManager);    }    if (regxParser) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?