📄 engine.cpp
字号:
// Will only move forward over chars already examined by a rule; // therefore, getChar() can't return kEndOfText, kNeedMoreInput, etc.{ for (unsigned int i = 0; i < numChars; ++i) { if (iBufPtr == iBufEnd) { iBuffer[iBufEnd++] = prevStage->getChar(); if (iBufEnd == iBufStart) { ++iBufStart; if (iBufStart == iBufSize) iBufStart = 0; } if (iBufEnd == iBufSize) iBufEnd = 0; } iBufPtr++; if (iBufPtr == iBufSize) iBufPtr = 0; }}template<class T>static const T*binary_search(const T* array, UInt32 count, UInt32 value){ while (count > 0) { const T* i = array; UInt32 count2 = count / 2; i += count2; if (READ(*i) < value) { array = i + 1; count -= count2 + 1; } else count = count2; } return array;}longPass::classMatch(UInt32 classNumber, UInt32 inChar) const{ const UInt32* classPtr = (const UInt32*)(matchClassBase + READ(*((const UInt32*)matchClassBase + classNumber))); UInt32 memberCount = READ(*classPtr++); if (bInputIsUnicode) { if (bSupplementaryChars) { // classes are 32-bit const UInt32* p = binary_search(classPtr, memberCount, inChar); if (READ(*p) == inChar) return p - classPtr; } else { // classes are 16-bit const UInt16* p = binary_search((const UInt16*)classPtr, memberCount, inChar); if (READ(*p) == inChar) return p - (const UInt16*)classPtr; } } else { // classes are 8-bit const UInt8* p = binary_search((const UInt8*)classPtr, memberCount, inChar); if (READ(*p) == inChar) return p - (const UInt8*)classPtr; } return -1;}UInt32Pass::repClassMember(UInt32 classNumber, UInt32 index) const{ const UInt32* classPtr = (const UInt32*)(repClassBase + READ(*((const UInt32*)repClassBase + classNumber))); UInt32 memberCount = READ(*classPtr++); if (index < memberCount) if (bOutputIsUnicode) if (bSupplementaryChars) return READ(classPtr[index]); else return READ(((const UInt16*)classPtr)[index]); else { return READ(((const UInt8*)classPtr)[index]); } else return 0; // this can't happen if the compiler is right!}#ifdef TRACINGstatic int _depth = 0;#endif#define RETURN(x) do { _rval = (x); goto _return_label; } while (0)#define matchYes 1#define matchNo 0UInt32Pass::match(int index, int repeats, int textLoc){/* attempt to match pattern starting at /index/ initial repeat count is /repeats/ text offset is /textLoc/ recurses whenever we might need to backtrack returns matchYes - succeeded matchNo - can't match at this position other values, eg: kNeedMoreInput kInvalidChar kUnmappedChar - aborted without a definite decision*/#ifdef TRACINGcerr << "match(" << index << ", " << repeats << ", " << textLoc << ")\n";#endif UInt32 _rval = matchNo; // we come back here to loop rather than recurse, with new values for the argumentsRESTART: // if this is the first attempt to match at this index, record where we are if (repeats == 0) { if (index == matchElems) matchedLength = textLoc; if (index < infoLimit) { info[index].matchedSpan.start = textLoc;#ifdef TRACINGcerr << "info[" << index << "].matchedSpan.start = " << textLoc << "\n";#endif } } // if we're at the end of the pattern, we have a match if (index >= patternLength) RETURN(matchYes); if (index == 0 && repeats == 0) sgrStack = 0; // ensure this is cleared at start of pattern (shouldn't be necessary?) { // gcc complains about jumping past initializers (from RETURN above) without this UInt32 mr; const MatchElem& m = pattern[index]; int repeatMin = READ(m.flags.repeat) >> 4; int repeatMax = READ(m.flags.repeat) & 0x0f; UInt8 type = READ(m.flags.type); bool negate = ((type & kMatchElem_Negate) != 0); type = ((type & kMatchElem_NonLit) != 0) ? type & kMatchElem_TypeMask : 0; int classIndex; bool matches; UInt32 inChar; // start of group: try each alternative in turn if (type == kMatchElem_Type_BGroup) { // try matching one of the alternatives in the group (again) info[index].groupRepeats = repeats; if (repeats < repeatMax) { int altIndex = index; while (true) { mr = match(altIndex + 1, 0, textLoc); if (mr != matchNo) RETURN(mr); // failed, so step ahead to next alternative or end of group altIndex += READ(pattern[altIndex].value.bgroup.dNext); if ((READ(pattern[altIndex].flags.type) & kMatchElem_TypeMask) != kMatchElem_Type_OR) break; } } // if the group has matched enough times... if (repeats >= repeatMin) { // try to match following stuff#ifdef TRACINGcerr << "repeats >= repeatMin\n";#endif mr = match(index + READ(m.value.bgroup.dAfter), 0, textLoc); if (mr == matchYes) { if (index < infoLimit) { info[index].matchedSpan.limit = textLoc;#ifdef TRACINGcerr << "group returning matchYes; info[" << index << "].matchedSpan.limit = " << textLoc << "\n";#endif // don't allow elements within the group to indicate matches beyond the span of the group itself for (int i = index + READ(m.value.bgroup.dAfter) - 1; i > index; --i) if (i < infoLimit) { if (info[i].matchedSpan.start > textLoc) info[i].matchedSpan.start = textLoc; if (info[i].matchedSpan.limit > textLoc) info[i].matchedSpan.limit = textLoc; } } } RETURN(mr); } // otherwise just backtrack RETURN(matchNo); } // reached end of an alternative else if (type == kMatchElem_Type_OR || type == kMatchElem_Type_EGroup) { int startIndex = index - READ(m.value.egroup.dStart); mr = match(startIndex, info[startIndex].groupRepeats + 1, textLoc); RETURN(mr); } // not a group, so we loop rather than recurse until optionality strikes else { // ensure that item matches at least repeatMin times while (repeats < repeatMin) { inChar = inputChar(textLoc); if (inChar == kInvalidChar || inChar == kNeedMoreInput || inChar == kUnmappedChar) RETURN(inChar); matches = false; switch (type) { case 0: // literal matches = (READ(m.value.usv.data) & kUSVMask) == inChar; break; case kMatchElem_Type_Class: classIndex = classMatch(READ(m.value.cls.index), inChar); matches = (classIndex != -1); if (matches && repeats == 0 && index < infoLimit) info[index].classIndex = classIndex; break; case kMatchElem_Type_ANY: matches = (inChar != kEndOfText); break; case kMatchElem_Type_EOS: matches = (inChar == kEndOfText); break; } matches = (matches != negate); if (!matches) RETURN(matchNo); ++repeats; textLoc += direction; } if (index < infoLimit) { info[index].matchedSpan.limit = textLoc;#ifdef TRACINGcerr << "info[" << index << "].matchedSpan.limit = " << textLoc << "\n";#endif } if (repeatMin == repeatMax) { // no need to recurse, as no optionality ++index; repeats = 0; goto RESTART; } // try for another repeat if allowed if (repeats < repeatMax) { inChar = inputChar(textLoc); if (inChar == kInvalidChar || inChar == kNeedMoreInput || inChar == kUnmappedChar) RETURN(inChar); matches = false; switch (type) { case 0: // literal matches = (READ(m.value.usv.data) & kUSVMask) == inChar; break; case kMatchElem_Type_Class: classIndex = classMatch(READ(m.value.cls.index), inChar); matches = (classIndex != -1); if (matches && repeats == 0 && index < infoLimit) info[index].classIndex = classIndex; break; case kMatchElem_Type_ANY: matches = (inChar != kEndOfText); break; case kMatchElem_Type_EOS: matches = (inChar == kEndOfText); break; } matches = (matches != negate); if (matches) { mr = match(index, repeats + 1, textLoc + direction); if (mr != matchNo) RETURN(mr); } } // otherwise try to match the remainder of the pattern mr = match(index + 1, 0, textLoc); RETURN(mr); } }_return_label: if (_rval == matchNo) if (index < infoLimit) { info[index].matchedSpan.limit = textLoc;#ifdef TRACINGcerr << "rval == matchNo; setting info[" << index << "].matchedSpan.limit = " << textLoc << "\n";#endif }#ifdef TRACINGcerr << "RETURN(" << (_rval == matchYes ? "matchYes" : "matchNo") << ")\n";#endif return _rval;}#undef RETURN#ifdef TRACINGstatic voidprintMatchElem(const MatchElem& m){ string rval; char buf[20]; if (m.flags.type & kMatchElem_Negate) rval += "!"; if (m.flags.type & kMatchElem_NonLit) { switch (m.flags.type & kMatchElem_TypeMask) { case kMatchElem_Type_Class: sprintf(buf, "[%d]", m.value.cls.index); rval += buf; break; case kMatchElem_Type_BGroup: rval += "("; break; case kMatchElem_Type_EGroup: rval += ")"; break; case kMatchElem_Type_OR: rval += "|"; break; case kMatchElem_Type_ANY: rval += "."; break; case kMatchElem_Type_EOS: rval += "#"; break; case kMatchElem_Type_Copy: rval += "@"; break; } } else { UInt32 v = m.value.usv.data & kUSVMask; if (v >= ' ' && v < 0x7e) { sprintf(buf, "'%c'", (char)v); rval += buf; } else { sprintf(buf, "0x%04X", (UInt32)v); rval += buf; } } if (!(m.flags.type & kMatchElem_NonLit) || (m.flags.type & kMatchElem_TypeMask) != kMatchElem_Type_BGroup) switch (m.flags.repeat) { case 0x01: rval += "?"; break; case 0x11: break; case 0x0F: rval += "*"; break; case 0x1F: rval += "+"; break; default: sprintf(buf, "{%d,%d}", m.flags.repeat >> 4, m.flags.repeat & 0x0F); rval += buf; break; } cerr << rval;}static voidprintMatch(const StringRule* rule){ for (int i = 0; i < READ(rule->matchLength); ++i) { cerr << " "; printMatchElem(((MatchElem*)(rule + 1))[i]);// cerr << "<" << i << ">"; } if (READ(rule->preLength) > 0 || READ(rule->postLength) > 0) { cerr << " /"; for (int i = READ(rule->preLength) - 1; i >= 0; --i) { cerr << " "; printMatchElem(((MatchElem*)(rule + 1))[READ(rule->matchLength) + READ(rule->postLength) + i]); } cerr << " _"; for (int i = 0; i < READ(rule->postLength); ++i) { cerr << " "; printMatchElem(((MatchElem*)(rule + 1))[READ(rule->matchLength) + i]); } }}static voidprintRep(const StringRule* rule){ const RepElem* r = (const RepElem*)((const MatchElem*)(rule + 1) + rule->matchLength + rule->preLength + rule->postLength); for (int i = 0; i < READ(rule->repLength); ++i, ++r) { cerr << " "; switch (READ(r->flags.type)) { case kRepElem_Literal: { UInt32 v; char buf[20]; v = READ(r->value); if (v >= ' ' && v <= 0x7e) { sprintf(buf, "'%c'", v); cerr << buf; } else { sprintf(buf, "0x%04X", v); cerr << buf; } } break; case kRepElem_Class: cerr << "[" << (int)READ(r->flags.repClass) << "," << (int)READ(r->flags.matchIndex) << "]"; break; case kRepElem_Copy: cerr << "@" << (int)READ(r->flags.matchIndex); break; case kRepElem_Unmapped: cerr << "?"; break; } }}#endifUInt32Pass::DoMapping(){ UInt32 inChar = inputChar(0); if (inChar == kNeedMoreInput || inChar == kInvalidChar || inChar == kUnmappedChar) return inChar; if (inChar == kEndOfText) { outputChar(kEndOfText); return inChar; } matchedLength = 1; const Lookup* lookup; if (bInputIsUnicode) { // Unicode lookup UInt16 charIndex = 0; if ((const UInt8*)lookupBase == pageBase) { // leave charIndex == 0 : pass with no rules } else { UInt8 plane = inChar >> 16; const UInt8* pageMap = 0; if (bSupplementaryChars) { if ((plane < 17) && (READ(planeMap[plane]) != 0xff)) { pageMap = (const UInt8*)(pageBase + 256 * READ(planeMap[plane])); goto GOT_PAGE_MAP; } } else if (plane == 0) { pageMap = pageBase; GOT_PAGE_MAP: UInt8 page = (inChar >> 8) & 0xff; if (READ(pageMap[page]) != 0xff) { const UInt16* charMapBase = (const UInt16*)(pageBase + 256 * numPageMaps); const UInt16* charMap = charMapBase + 256 * READ(pageMap[page]); charIndex = READ(charMap[inChar & 0xff]); } } } lookup = lookupBase + charIndex; } else { // byte-oriented lookup if (pageBase != (const Byte*)tableHeader) { // dbcsPage present long pageNumber = READ(pageBase[inChar]); if (pageNumber == 0) // not a valid DBCS lead byte lookup = lookupBase + inChar; else { UInt32 nextChar = inputChar(1); if (nextChar == kNeedMoreInput || nextChar == kInvalidChar || nextChar == kUnmappedChar) return nextChar; if (nextChar == kEndOfText) lookup = lookupBase + inChar; else { lookup = lookupBase + pageNumber * 256 + nextChar; if (READ(lookup->rules.type) == kLookupType_IllegalDBCS) // illegal DBCS sequence; map lead byte alone lookup = lookupBase + inChar; else matchedLength = 2; } } } else // single-byte only lookup = lookupBase + inChar; } UInt8 ruleType = READ(lookup->rules.type); if (ruleType == kLookupType_StringRules || (ruleType & kLookupType_RuleTypeMask) == kLookupType_ExtStringRules) { // process string rule list const UInt32* ruleList = (const UInt32*)stringListBase + READ(lookup->rules.ruleIndex); bool matched = false; bool allowInsertion = true; int ruleCount = READ(lookup->rules.ruleCount); if ((ruleType & kLookupType_RuleTypeMask) == kLookupType_ExtStringRules) ruleCount += 256 * (ruleType & kLookupType_ExtRuleCountMask); for ( ; ruleCount > 0; --ruleCount) { const StringRule* rule = (const StringRule*)(stringRuleData + READ(*ruleList));#ifdef TRACINGif (traceLevel > 0) { cerr << "** trying match: "; printMatch(rule); cerr << "\n";}#endif ruleList++; matchElems = READ(rule->matchLength); if (matchElems == 0 && allowInsertion == false) continue; patternLength = matchElems + READ(rule->postLength); pattern = (MatchElem*)(rule + 1); // point past the defined struct for the rule header direction = 1; infoLimit = matchElems; // clear junk... for (int i = 0; i < infoLimit; ++i) info[i].matchedSpan.start = info[i].matchedSpan.limit = 0; UInt32 mr = match(0, 0, 0); if (mr == matchYes) { if (matchedLength == 0 && allowInsertion == false) continue; pattern += patternLength; patternLength = READ(rule->preLength); if (patternLength > 0) { direction = -1; infoLimit = 0; matchElems = -1; mr = match(0, 0, -1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -