📄 compiler.cpp
字号:
ungetChar(currCh); break; } } return true; } ungetChar(currCh); currCh = '0'; } // else fall through case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': tok.type = tok_Number; tok.val = currCh - '0'; while (textPtr < textEnd) { currCh = getChar(); if (currCh >= '0' && currCh <= '9') tok.val = tok.val * 10 + currCh - '0'; else { ungetChar(currCh); break; } } return true; case ';': { bool continuation = false; while (textPtr < textEnd) { continuation = (currCh == '\\'); currCh = getChar(); if (currCh == '\r' || currCh == '\n') break; } if (textPtr < textEnd) { UInt32 nextCh = getChar(); if (!((currCh == '\r' && nextCh == '\n') || (currCh == '\n' && nextCh == '\r'))) ungetChar(nextCh); } ++lineNumber; if (continuation) continue; else { tok.type = tok_Newline; return true; } } case 'U': // check for U+xxxx USV if (textPtr < textEnd) { currCh = getChar(); if (currCh == '+') { tok.type = tok_USV; tok.val = 0; int digitCount = 0; while (textPtr < textEnd) { currCh = getChar(); if (currCh >= '0' && currCh <= '9') tok.val = tok.val * 16 + currCh - '0'; else if (currCh >= 'a' && currCh <= 'f') tok.val = tok.val * 16 + currCh - 'a' + 10; else if (currCh >= 'A' && currCh <= 'F') tok.val = tok.val * 16 + currCh - 'A' + 10; else { ungetChar(currCh); break; } ++digitCount; } if (digitCount < 4 || digitCount > 6) { Error("Unicode value (U+xxxx) must have 4-6 hex digits"); tok.val = 0; } return true; } else ungetChar(currCh); } currCh = 'U'; goto DEFAULT; // read an identifier or some other 'unknown' character default: DEFAULT: if (isIDstart(currCh)) { idBuffer[0] = currCh; tok.val = 1; while (textPtr < textEnd) { currCh = getChar(); if (!isIDcont(currCh)) { ungetChar(currCh); break; } if (tok.val < 256) idBuffer[tok.val++] = currCh; } tok.type = IDlookup(&idBuffer[0], tok.val); return true; } tok.type = tok_Unknown; tok.val = currCh; return true; } }}boolCompiler::ExpectToken(tokenType type, const char* errMsg){ if (!GetNextToken() || tok.type != type) { Error(errMsg); return false; } return true;}voidCompiler::Error(const char* msg, const char* s, UInt32 line){ if (line == 0xffffffff) line = lineNumber; if (errorFunction == 0) { cout << "Error: " << msg; if (s != 0) cout << ": \"" << s << '"'; cout << " at line " << line << endl; } else (*errorFunction)(errFuncUserData, (char*)msg, (char*)s, line); errorState = true; ++errorCount;}voidCompiler::StartDefaultPass(){ if ((currentPass.passType & 0xFFFF0000) == (FOUR_CHAR_CODE('N','F','_','_') & 0xFFFF0000)) { Error("normalization pass cannot contain any other rules"); currentPass.passType = kCode_Unic; } if (currentPass.passType == 0) { currentPass.clear(); // should already be clear! currentPass.passType = kCode_BU; currentPass.setLineNo(lineNumber); }}voidCompiler::AppendToRule(const Item& item){ StartDefaultPass(); switch (ruleState) { case notInRule: ruleState = inLHSString; currentRule.setLineNo(lineNumber); case inLHSString: currentRule.lhsString.push_back(item); break; case inLHSPreContext: currentRule.lhsPreContext.push_back(item); break; case inLHSPostContext: currentRule.lhsPostContext.push_back(item); break; case inRHSString: currentRule.rhsString.push_back(item); break; case inRHSPreContext: currentRule.rhsPreContext.push_back(item); break; case inRHSPostContext: currentRule.rhsPostContext.push_back(item); break; }}UInt32Compiler::charLimit(){ UInt32 limit; switch (ruleState) { case inRHSString: case inRHSPreContext: case inRHSPostContext: limit = (currentPass.passType == kCode_BU || currentPass.passType == kCode_Unic ? 0x10ffff : 0xff); break; default: limit = (currentPass.passType == kCode_UB || currentPass.passType == kCode_Unic ? 0x10ffff : 0xff); break; } return limit;}voidCompiler::AppendLiteral(UInt32 val, bool negate){ StartDefaultPass(); if (val > charLimit()) { Error("literal value out of range"); return; } Item item; item.type = 0; item.negate = negate ? 1 : 0; item.repeatMin = 0xff; item.repeatMax = 0xff; item.val = val; AppendToRule(item);}voidCompiler::AppendUSV(UInt32 val, bool negate){ StartDefaultPass(); if (charLimit() == 0xff) { Error("can't use Unicode character in byte encoding"); return; } AppendLiteral(val, negate);}voidCompiler::AppendSpecial(UInt8 type, bool negate){ Item item; item.type = type; item.negate = negate ? 1 : 0; item.repeatMin = 0xff; item.repeatMax = 0xff; item.val = 0; item.start = item.next = item.after = item.index = 0xff; AppendToRule(item);}voidCompiler::AppendClass(const string& className, bool negate){ StartDefaultPass(); Item item; item.type = kMatchElem_Type_Class; item.negate = negate ? 1 : 0; item.repeatMin = 0xff; item.repeatMax = 0xff; item.val = 0; const map<string,UInt32>* classNames; switch (ruleState) { case inRHSString: case inRHSPreContext: case inRHSPostContext: classNames = (currentPass.passType == kCode_Byte || currentPass.passType == kCode_UB) ? ¤tPass.byteClassNames : ¤tPass.uniClassNames; break; default: classNames = (currentPass.passType == kCode_Byte || currentPass.passType == kCode_BU) ? ¤tPass.byteClassNames : ¤tPass.uniClassNames; break; } map<string,UInt32>::const_iterator i; i = classNames->find(className); if (i == classNames->end()) Error("undefined class", className.c_str()); else item.val = i->second; AppendToRule(item);}boolCompiler::tagExists(bool rhs, const string& tag){ if (rhs) { if ( (findTag(tag, currentRule.rhsString) != -1) || (findTag(tag, currentRule.rhsPreContext) != -1) || (findTag(tag, currentRule.rhsPostContext) != -1)) return true; } else { if ( (findTag(tag, currentRule.lhsString) != -1) || (findTag(tag, currentRule.lhsPreContext) != -1) || (findTag(tag, currentRule.lhsPostContext) != -1)) return true; } return false;}voidCompiler::AssignTag(const string& tag){ if (currentPass.passType == 0 || ruleState == notInRule) { Error("item tag doesn't seem to be attached to a rule item", tag.c_str()); return; } Item* item = NULL; switch (ruleState) { default: Error("this can't happen (AssignTag)"); return; case inLHSString: if (tagExists(false, tag)) break; item = ¤tRule.lhsString.back(); break; case inLHSPreContext: if (tagExists(false, tag)) break; item = ¤tRule.lhsPreContext.back(); break; case inLHSPostContext: if (tagExists(false, tag)) break; item = ¤tRule.lhsPostContext.back(); break; case inRHSString: if (tagExists(true, tag)) break; item = ¤tRule.rhsString.back(); break; case inRHSPreContext: if (tagExists(true, tag)) break; item = ¤tRule.rhsPreContext.back(); break; case inRHSPostContext: if (tagExists(true, tag)) break; item = ¤tRule.rhsPostContext.back(); break; } if (item == NULL) { Error("duplicate tag (ignored)", tag.c_str()); return; } if (item->tag.length() > 0) { Error("rule item already has a tag", tag.c_str()); return; } switch (item->type) { case 0: case kMatchElem_Type_Class: case kMatchElem_Type_EGroup: case kMatchElem_Type_ANY: case kMatchElem_Type_Copy: item->tag = tag; break; default: Error("invalid use of item tag", tag.c_str()); break; }}voidCompiler::SetMinMax(int repeatMin, int repeatMax){ Item* item = 0; switch (ruleState) { default: Error("invalid use of repeat count"); break; case inLHSString: item = ¤tRule.lhsString.back(); break; case inLHSPreContext: item = ¤tRule.lhsPreContext.back(); break; case inLHSPostContext: item = ¤tRule.lhsPostContext.back(); break; case inRHSString: item = ¤tRule.rhsString.back(); break; case inRHSPreContext: item = ¤tRule.rhsPreContext.back(); break; case inRHSPostContext: item = ¤tRule.rhsPostContext.back(); break; } if (item) { switch (item->type) { case 0: case kMatchElem_Type_Class: case kMatchElem_Type_ANY: case kMatchElem_Type_EGroup: if (repeatMin > repeatMax || repeatMax < 1 || repeatMax > 15) Error("invalid repeat counts (0-15 allowed)"); else if (item->repeatMin != 0xff) Error("multiple repeat counts on item"); else { item->repeatMin = repeatMin; item->repeatMax = repeatMax; } break; default: Error("invalid use of repeat count"); break; } }}voidCompiler::setGroupPointers(vector<Item>::iterator b, vector<Item>::iterator e, int startIndex, bool isReversed){// set up the fwd and back pointers on bgroup/or/egroup// and propagate repeat counts from egroup to bgroup vector<Item>::iterator base = b; vector<Item>::iterator altStart = startIndex > 0 ? base - 1 : e; bool altSeen = false; while (b != e) { if (b->repeatMin == 0xff) b->repeatMin = 1; if (b->repeatMax == 0xff) b->repeatMax = 1; switch (b->type) { case 0: // literal case kMatchElem_Type_Class: case kMatchElem_Type_ANY: case kMatchElem_Type_EOS: break; case kMatchElem_Type_OR: // if startIndex > 0, then initial altStart will be valid if ((startIndex > 0 || altSeen) && (altStart->type == kMatchElem_Type_OR || altStart->type == kMatchElem_Type_BGroup)) altStart->next = startIndex + (b - base); else { Error("this can't happen (setGroupPointers 1)"); return; } altStart = b; altStart->start = startIndex - 1; altSeen = true; break; case kMatchElem_Type_EGroup: Error("this can't happen (setGroupPointers 2)"); return; case kMatchElem_Type_BGroup: { // need to find corresponding EGroup and copy repeat counts from there // (or vice versa if this is reversed context) vector<Item>::iterator subGroupStart = b++; subGroupStart->next = 0; int nestingLevel = 0; while (b->type != kMatchElem_Type_EGroup || nestingLevel > 0) { if (b->type == kMatchElem_Type_BGroup) ++nestingLevel; else if (b->type == kMatchElem_Type_EGroup) --nestingLevel; ++b; } if (isReversed) { b->repeatMin = subGroupStart->repeatMin; b->repeatMax = subGroupStart->repeatMax; } else { if (b->repeatMin == 0xff) b->repeatMin = 1; if (b->repeatMax == 0xff) b->repeatMax = 1; subGroupStart->repeatMin = b->repeatMin; subGroupStart->repeatMax = b->repeatMax; } setGroupPointers(subGroupStart + 1, b, startIndex + (subGroupStart - base + 1), isReversed); subGroupStart->after = startIndex + (b - base + 1); b->start = startIndex + (subGroupStart - base); } break; } ++b; } if (altSeen) altStart->next = startIndex + (b - base); // set NEXT pointer of last OR if (startIndex > 0) { // we were handling a group, so set pointers of EGroup if (b->type == kMatchElem_Type_EGroup) b->start = startIndex - 1; else { Error("this can't happen (setGroupPointers 3)"); return; } }}voidCompiler::setGroupPointers(vector<Rule>& rules){ for (vector<Rule>::iterator i = rules.begin(); i != rules.end(); ++i) { setGroupPointers(i->matchStr.begin(), i->matchStr.end(), 0); setGroupPointers(i->preContext.begin(), i->preContext.end(
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -