regularexpression.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,678 行 · 第 1/4 页

CPP
1,678
字号
        regxParser->setTokenFactory(fTokenFactory);    }	Janitor<RegxParser> janRegxParser(regxParser);	fTokenTree = regxParser->parse(fPattern, fOptions);	fNoGroups = regxParser->getNoParen();	fHasBackReferences = regxParser->hasBackReferences();}// ---------------------------------------------------------------------------//  RegularExpression: Matching methods// ---------------------------------------------------------------------------bool RegularExpression::matches(const char* const expression                                , MemoryManager* const manager) {    XMLCh* tmpBuf = XMLString::transcode(expression, manager);    ArrayJanitor<XMLCh> janBuf(tmpBuf, manager);	return matches(tmpBuf, 0, XMLString::stringLen(tmpBuf), 0, manager);}bool RegularExpression::matches(const char* const expression,								const int start, const int end                                , MemoryManager* const manager) {	XMLCh* tmpBuf = XMLString::transcode(expression, manager);    ArrayJanitor<XMLCh> janBuf(tmpBuf, manager);	return matches(tmpBuf, start, end, 0, manager);}bool RegularExpression::matches(const char* const expression,								Match* const match                                , MemoryManager* const manager)				{	XMLCh* tmpBuf = XMLString::transcode(expression, manager);    ArrayJanitor<XMLCh> janBuf(tmpBuf, manager);	return matches(tmpBuf, 0, XMLString::stringLen(tmpBuf), match, manager);}bool RegularExpression::matches(const char* const expression, const int start,                                const int end, Match* const pMatch                                , MemoryManager* const manager)				{	XMLCh* tmpBuf = XMLString::transcode(expression, manager);    ArrayJanitor<XMLCh> janBuf(tmpBuf, manager);	return matches(tmpBuf, start, end, pMatch, manager);}// ---------------------------------------------------------------------------//  RegularExpression: Matching methods - Wide char version// ---------------------------------------------------------------------------bool RegularExpression::matches(const XMLCh* const expression, MemoryManager* const manager) {	return matches(expression, 0, XMLString::stringLen(expression), 0, manager);}bool RegularExpression::matches(const XMLCh* const expression,								const int start, const int end                                , MemoryManager* const manager) {	return matches(expression, start, end, 0, manager);}bool RegularExpression::matches(const XMLCh* const expression,								Match* const match                                , MemoryManager* const manager)				{	return matches(expression, 0, XMLString::stringLen(expression), match, manager);}bool RegularExpression::matches(const XMLCh* const expression, const int start,                                const int end, Match* const pMatch                                , MemoryManager* const manager)	{			if (fOperations == 0)		prepare();	Context context(manager);	int		 strLength = XMLString::stringLen(expression);    context.reset(expression, strLength, start, end, fNoClosures);	bool adoptMatch = false;	Match* lMatch = pMatch;	if (lMatch != 0) {		lMatch->setNoGroups(fNoGroups);	}	else if (fHasBackReferences) {		lMatch = new (fMemoryManager) Match(fMemoryManager);		lMatch->setNoGroups(fNoGroups);		adoptMatch = true;	}	if (context.fAdoptMatch)		delete context.fMatch;    context.fMatch = lMatch;	context.fAdoptMatch = adoptMatch;	if (isSet(fOptions, XMLSCHEMA_MODE)) {		int matchEnd = match(&context, fOperations, context.fStart, 1);		if (matchEnd == context.fLimit) {			if (context.fMatch != 0) {				context.fMatch->setStartPos(0, context.fStart);				context.fMatch->setEndPos(0, matchEnd);			}					return true;		}		return false;	}	/*	 *	If the pattern has only fixed string, use Boyer-Moore	 */	if (fFixedStringOnly) {		int ret = fBMPattern->matches(expression, context.fStart,			                          context.fLimit);		if (ret >= 0) {			if (context.fMatch != 0) {				context.fMatch->setStartPos(0, ret);				context.fMatch->setEndPos(0, ret + strLength);			}					return true;		}				return false;	}	/*	 *	If the pattern contains a fixed string, we check with Boyer-Moore	 *	whether the text contains the fixed string or not. If not found	 *	return false	 */	if (fFixedString != 0) {		int ret = fBMPattern->matches(expression, context.fStart,                                      context.fLimit);		if (ret < 0) { // No match			return false;		}	}	int limit = context.fLimit - fMinLength;	int matchStart;	int matchEnd = -1;	/*	 *	Check whether the expression start with ".*"	 */	if (fOperations != 0 && fOperations->getOpType() == Op::O_CLOSURE        && fOperations->getChild()->getOpType() == Op::O_DOT) {		if (isSet(fOptions, SINGLE_LINE)) {			matchStart = context.fStart;			matchEnd = match(&context, fOperations, matchStart, 1);		}		else {			bool previousIsEOL = true;			for (matchStart=context.fStart; matchStart<=limit; matchStart++) {				XMLCh ch = expression[matchStart];				if (RegxUtil::isEOLChar(ch)) {					previousIsEOL = true;				}				else {					if (previousIsEOL) {						if (0 <= (matchEnd = match(&context, fOperations,                                                   matchStart, 1)))                            break;					}					previousIsEOL = false;				}			}		}	}	else {        /*         *	Optimization against the first char         */		if (fFirstChar != 0) {			bool ignoreCase = isSet(fOptions, IGNORE_CASE);			RangeToken* range = fFirstChar;			if (ignoreCase)				range = fFirstChar->getCaseInsensitiveToken(fTokenFactory);			for (matchStart=context.fStart; matchStart<=limit; matchStart++) {                XMLInt32 ch;				if (!context.nextCh(ch, matchStart, 1))					break;				if (!range->match(ch)) {					if (!ignoreCase)						continue;					// Perform case insensitive match					// REVISIT					continue;				}				if (0 <= (matchEnd = match(&context,fOperations,matchStart,1)))					break;            }		}		else {            /*             *	Straightforward matching             */			for (matchStart=context.fStart; matchStart<=limit; matchStart++) {				if (0 <= (matchEnd = match(&context,fOperations,matchStart,1)))					break;			}		}	}	if (matchEnd >= 0) {		if (context.fMatch != 0) {			context.fMatch->setStartPos(0, matchStart);			context.fMatch->setEndPos(0, matchEnd);		}				return true;	}	return false;}// ---------------------------------------------------------------------------//  RegularExpression: Tokenize methods// ---------------------------------------------------------------------------RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const char* const expression) {  XMLCh* tmpBuf = XMLString::transcode(expression, fMemoryManager);  ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);  return tokenize(tmpBuf, 0, XMLString::stringLen(tmpBuf));}RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const char* const expression,								const int start, const int end) {  XMLCh* tmpBuf = XMLString::transcode(expression, fMemoryManager);  ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);  return tokenize(tmpBuf, start, end);}// ---------------------------------------------------------------------------//  RegularExpression: Tokenize methods - Wide char version// ---------------------------------------------------------------------------RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const XMLCh* const expression) {  return tokenize(expression, 0, XMLString::stringLen(expression), 0);}RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const XMLCh* const expression,								                                     const int start, const int end){  return tokenize(expression, start, end, 0);}RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const XMLCh* const expression,                                                      const int start, const int end,                                                     RefVectorOf<Match> *subEx){    if (fOperations == 0)	  prepare();  RefArrayVectorOf<XMLCh>* tokenStack = new (fMemoryManager) RefArrayVectorOf<XMLCh>(16, true, fMemoryManager);  Context context(fMemoryManager);  int		 strLength = XMLString::stringLen(expression);   context.reset(expression, strLength, start, end, fNoClosures);   Match* lMatch = 0;  bool adoptMatch = false;  if (subEx || fHasBackReferences) {    lMatch = new (fMemoryManager) Match(fMemoryManager);    adoptMatch = true;    lMatch->setNoGroups(fNoGroups);  }  if (context.fAdoptMatch) 	  delete context.fMatch;    context.fMatch = lMatch;  context.fAdoptMatch = adoptMatch;  int tokStart = start;  int matchStart = start;  for (; matchStart <= end; matchStart++) {    	  int matchEnd = match(&context, fOperations, matchStart, 1);   	  if (matchEnd != -1) { 	    if (context.fMatch != 0) { 	      context.fMatch->setStartPos(0, context.fStart); 	      context.fMatch->setEndPos(0, matchEnd); 	    }      if (subEx){        subEx->addElement(lMatch);        lMatch = new (fMemoryManager) Match(*(context.fMatch));        adoptMatch = true;                context.fAdoptMatch = adoptMatch;        context.fMatch = lMatch;      }      XMLCh* token;      if (tokStart == matchStart){          if (tokStart == strLength){          tokStart--;          break;          }        token = (XMLCh*) fMemoryManager->allocate(sizeof(XMLCh));//new XMLCh[1];        token[0] = chNull;        // When you tokenize using zero string, will return each        // token in the string. Since the zero string will also         // match the start/end characters, resulting in empty         // tokens, we ignore them and do not add them to the stack.         if (!XMLString::equals(fPattern, &chNull))           tokenStack->addElement(token);         else            fMemoryManager->deallocate(token);//delete[] token;      } else {        token = (XMLCh*) fMemoryManager->allocate        (            (matchStart + 1 - tokStart) * sizeof(XMLCh)        );//new XMLCh[matchStart + 1 - tokStart];        XMLString::subString(token, expression, tokStart, matchStart, fMemoryManager);        tokenStack->addElement(token);      }       tokStart = matchEnd;      //decrement matchStart as will increment it at the top of the loop      if (matchStart < matchEnd - 1)         matchStart = matchEnd - 1; 	        }  }   XMLCh* token;   if (matchStart == tokStart + 1){    token = (XMLCh*) fMemoryManager->allocate(sizeof(XMLCh));//new XMLCh[1];    token[0] = chNull;    } else {    token = (XMLCh*) fMemoryManager->allocate    (        (strLength + 1 - tokStart) * sizeof(XMLCh)    );//new XMLCh[strLength + 1 - tokStart];    XMLString::subString(token, expression, tokStart, strLength, fMemoryManager);  }    if (!XMLString::equals(fPattern, &chNull))     tokenStack->addElement(token);  else    fMemoryManager->deallocate(token);//delete[] token;  return tokenStack;}// -----------------------------------------------------------------------//  RegularExpression: Replace methods// -----------------------------------------------------------------------XMLCh* RegularExpression::replace(const char* const matchString,                                   const char* const replaceString){	XMLCh* tmpBuf = XMLString::transcode(matchString, fMemoryManager);    ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);	XMLCh* tmpBuf2 = XMLString::transcode(replaceString, fMemoryManager);    ArrayJanitor<XMLCh> janBuf2(tmpBuf2, fMemoryManager);	return replace(tmpBuf, tmpBuf2, 0, XMLString::stringLen(tmpBuf));}XMLCh* RegularExpression::replace(const char* const matchString,                                   const char* const replaceString,                                  const int start, const int end){ 	XMLCh* tmpBuf = XMLString::transcode(matchString, fMemoryManager);    ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager); 	XMLCh* tmpBuf2 = XMLString::transcode(replaceString, fMemoryManager);    ArrayJanitor<XMLCh> janBuf2(tmpBuf2, fMemoryManager);    return replace(tmpBuf, tmpBuf2, start, end);}// ---------------------------------------------------------------------------//  RegularExpression: Replace methods - Wide char version

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?