📄 compiler.cpp
字号:
if ((i->repeatMin != 1) && (i->repeatMin != 255)) { rval += " min=\""; rval += asDec(i->repeatMin); rval += "\""; } if ((i->repeatMax != 1) && (i->repeatMax != 255)) { rval += " max=\""; rval += asDec(i->repeatMax); rval += "\""; } if (i->tag.length() > 0) { if (i->type != kMatchElem_Type_Copy) { rval += " id=\""; rval += i->tag; rval += "\""; } } rval += "/>"; } return rval;}Compiler::Compiler(const char* txt, UInt32 len, char inForm, bool cmp, bool genXML, TECkit_ErrorFn errFunc, void* userData){ compiledTable = 0; compiledSize = 0; usedExtStringRules = false; textPtr = (const unsigned char*)txt; textEnd = textPtr + len; ungotten = kInvalidChar; inputForm = inForm; generateXML = genXML; lineNumber = 1; errorState = false; errorCount = 0; errorFunction = errFunc; errFuncUserData = userData; names.clear(); fwdTables.clear(); revTables.clear(); currentPass.clear(); buildVars.clear(); currentRule.clear(); lhsFlags = 0; rhsFlags = 0; char classType; ruleState = notInRule; int nestingLevel = 0; defIter = defEnd; while (inputForm == kForm_Unspecified) { // attempt to determine input encoding if (len >= 4) { // check for UTF32 BOM or 3 nulls in first 4 bytes if (strncmp(txt, "\0\0\376\377", 4) == 0) { inputForm = kForm_UTF32BE; break; } if (strncmp(txt, "\377\376\0\0", 4) == 0) { inputForm = kForm_UTF32LE; break; } if (strncmp(txt, "\0\0\0", 3) == 0) { inputForm = kForm_UTF32BE; break; } if (strncmp(txt+1, "\0\0\0", 3) == 0) { inputForm = kForm_UTF32LE; break; } } if (len >= 3) { // check for UTF8 signature if (strncmp(txt, "\357\273\277", 3) == 0) { inputForm = kForm_UTF8; break; } } if (len >= 2) { // check for UTF16 BOM or null byte if (strncmp(txt, "\376\377", 2) == 0) { inputForm = kForm_UTF16BE; break; } if (strncmp(txt, "\377\376", 2) == 0) { inputForm = kForm_UTF16LE; break; } if (txt[0] == '\0') { inputForm = kForm_UTF16BE; break; } if (txt[1] == '\0') { inputForm = kForm_UTF16LE; break; } } inputForm = kForm_Bytes; }; if (inputForm != kForm_Bytes) { // discard initial BOM if present currCh = getChar(); if (currCh != 0xfeff) ungetChar(currCh); } while (GetNextToken()) { // on error, skip to next newline GOT_TOKEN: if (errorState) { if (ruleState != notInRule) { ruleState = notInRule; nestingLevel = 0; currentRule.clear(); } if (tok.type != tok_Newline) continue; } errorState = false; string32::const_iterator i; switch (tok.type) { default: Error("this can't happen!"); break; case tok_Unknown: Error("unexpected character", string(1, tok.val).c_str()); break; case tok_Identifier: Error("unexpected identifier", asUTF8(tok.strval).c_str()); break; case tok_Define: if (!ExpectToken(tok_Identifier, "expected identifier after Define")) break; { string defName(asUTF8(tok.strval)); tokListT defToks; while (GetNextToken()) { if (tok.type == tok_Newline) break; if (tok.type == tok_Unknown) Error("unexpected character in Define text", string(1, tok.val).c_str()); else defToks.push_back(tok); } defines[defName] = defToks; } break; case tok_Newline: switch (ruleState) { default: break; case inLHSString: case inLHSPreContext: case inLHSPostContext: Error("no mapping operator found"); goto GOT_TOKEN; case inRHSString: case inRHSPreContext: case inRHSPostContext: if (nestingLevel > 0) { Error("unmatched opening parenthesis"); break; } if (ruleType == 0 || ruleType == '>') { currentPass.fwdRules.push_back(Rule(currentRule.lhsString, reverseContext(currentRule.lhsPreContext), currentRule.lhsPostContext, currentRule.rhsString, currentRule.startingLine)); } if (ruleType == 0 || ruleType == '<') { currentPass.revRules.push_back(Rule(currentRule.rhsString, reverseContext(currentRule.rhsPreContext), currentRule.rhsPostContext, currentRule.lhsString, currentRule.startingLine)); } if (generateXML) { // create an XML representation of the rule and append to currentPass.xmlRules/xmlContexts bool sourceUni = (currentPass.passType == kCode_UB) || (currentPass.passType == kCode_Unic); bool targetUni = (currentPass.passType == kCode_BU) || (currentPass.passType == kCode_Unic); string xmlRule; xmlRule += "<a"; xmlRule += " line=\""; xmlRule += asDec(currentRule.startingLine); xmlRule += "\""; if (ruleType == '>') xmlRule += " dir=\"fwd\""; else if (ruleType == '<') xmlRule += " dir=\"rev\""; xmlRule += ">\n"; string contextID; xmlRule += "<l"; if (currentRule.lhsPreContext.size() != 0) { contextID = getContextID(currentRule.lhsPreContext, sourceUni); xmlRule += " preCtx=\""; xmlRule + contextID; xmlRule += "\""; } if (currentRule.lhsPostContext.size() != 0) { contextID = getContextID(currentRule.lhsPostContext, sourceUni); xmlRule += " postCtx=\""; xmlRule += contextID; xmlRule += "\""; } xmlRule += ">"; xmlRule += xmlString(currentRule.lhsString.begin(), currentRule.lhsString.end(), sourceUni); xmlRule += "</l>\n"; xmlRule += "<r"; if (currentRule.rhsPreContext.size() != 0) { contextID = getContextID(currentRule.rhsPreContext, targetUni); xmlRule += " preCtx=\""; xmlRule += contextID; xmlRule += "\""; } if (currentRule.rhsPostContext.size() != 0) { contextID = getContextID(currentRule.rhsPostContext, targetUni); xmlRule += " postCtx=\""; xmlRule += contextID; xmlRule += "\""; } xmlRule += ">"; xmlRule += xmlString(currentRule.rhsString.begin(), currentRule.rhsString.end(), targetUni); xmlRule += "</r>\n"; xmlRule += "</a>\n"; currentPass.xmlRules.push_back(xmlRule); } currentRule.clear(); ruleState = notInRule; break; } break; case tok_Number: AppendLiteral(tok.val); break; case tok_USV: AppendUSV(tok.val); break; case tok_String: if (inputForm == kForm_Bytes && charLimit() != 0xff) { Error("can't use quoted string for Unicodes in 8-bit source text"); break; } if (inputForm != kForm_Bytes && charLimit() == 0xff) { Error("can't use quoted string for Bytes in Unicode source text"); break; } for (i = tok.strval.begin(); i != tok.strval.end(); ++i) AppendLiteral(*i); break; case '^': // negation can only apply to a few things: GetNextToken(); switch (tok.type) { case tok_Number: AppendLiteral(tok.val, true); break; case tok_USV: AppendUSV(tok.val, true); break; case '[': if (!ExpectToken(tok_Identifier, "expected CLASS-NAME after opening bracket")) break; AppendClass(asUTF8(tok.strval), true); if (!ExpectToken(']', "expected closing bracket after CLASS-NAME")) break; break; case '#': AppendSpecial(kMatchElem_Type_EOS, true); break; case '.': AppendSpecial(kMatchElem_Type_ANY, true); break; default: Error("invalid use of negation"); break; } break; case '#': AppendSpecial(kMatchElem_Type_EOS); break; case '.': AppendSpecial(kMatchElem_Type_ANY); break; case '(': AppendSpecial(kMatchElem_Type_BGroup); ++nestingLevel; break; case ')': if (nestingLevel == 0) { Error("unmatched closing parenthesis"); break; } --nestingLevel; AppendSpecial(kMatchElem_Type_EGroup); break; case '|': if (nestingLevel == 0) { Error("alternation only permitted within parentheses"); break; } AppendSpecial(kMatchElem_Type_OR); break; case tok_Map: case '>': case '<': if (nestingLevel > 0) { Error("unmatched opening parenthesis"); break; } switch (ruleState) { default: Error("not within a mapping rule"); break; case inLHSString: case inLHSPostContext: ruleState = inRHSString; ruleType = (tok.type == tok_Map ? 0 : (tok.type == '>' ? '>' : '<')); break; case inLHSPreContext: Error("no underscore found in context"); break; case inRHSString: case inRHSPreContext: case inRHSPostContext: Error("extra mapping operator in rule"); break; } break; case '/': if (nestingLevel > 0) { Error("unmatched opening parenthesis"); break; } switch (ruleState) { default: Error("not within a mapping rule"); break; case inLHSString: ruleState = inLHSPreContext; break; case inRHSString: ruleState = inRHSPreContext; break; case inLHSPreContext: case inLHSPostContext: case inRHSPreContext: case inRHSPostContext: Error("extra slash in rule"); break; } break; case '_': if (nestingLevel > 0) { Error("unmatched opening parenthesis"); break; } switch (ruleState) { default: Error("not within a mapping rule"); break; case inLHSPreContext: ruleState = inLHSPostContext; break; case inRHSPreContext: ruleState = inRHSPostContext; break; case inLHSString: case inRHSString: Error("underscore only allowed in context"); break; case inLHSPostContext: case inRHSPostContext: Error("extra underscore in context"); break; } break; case '[': if (!ExpectToken(tok_Identifier, "expected CLASS-NAME after opening bracket")) break; AppendClass(asUTF8(tok.strval)); if (!ExpectToken(']', "expected closing bracket after CLASS-NAME")) break; break; case ']': Error("unmatched closing bracket"); break; case '=': if (!ExpectToken(tok_Identifier, "expected tag name after '='")) break; AssignTag(asUTF8(tok.strval)); break; case '@': if (!ExpectToken(tok_Identifier, "expected tag name after '@'")) break; AppendSpecial(kMatchElem_Type_Copy); AssignTag(asUTF8(tok.strval)); break; case '?': SetMinMax(0, 1); break; case '*': SetMinMax(0, 15); break; case '+': SetMinMax(1, 15); break; case '{': { int repeatMin = 0; int repeatMax = 15; GetNextToken(); if (tok.type == tok_Number) { repeatMin = repeatMax = tok.val; GetNextToken(); if (tok.type == ',') { GetNextToken(); if (tok.type == tok_Number) { repeatMax = tok.val; if (!ExpectToken('}', "expected closing brace after repeat counts")) break; } else if (tok.type == '}') repeatMax = 15; else { Error("expected repeat count or closing brace after comma"); break; } } else if (tok.type != '}') { Error("expected comma or closing brace after repeat count"); break; } } else if (tok.type == ',') { GetNextToken(); if (tok.type == tok_Number) repeatMax = tok.val; else { Error("expected repeat count"); break; } if (!ExpectToken('}', "expected closing brace after repeat count")) break; } else { Error("expected repeat counts within braces"); break; } SetMinMax(repeatMin, repeatMax); } break; case '}': Error("unmatched closing brace"); break; case tok_Name: if (tok.val == 0xffffffff) { if (!ExpectToken('(', "expected (NUMBER) STRING after Name")) break; if (!ExpectToken(tok_Number, "expected (NUMBER) STRING after Name"))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -