📄 compiler.cpp

📁 Emdros is a text database middleware-layer aimed at storage and retrieval of "text plus information
💻 CPP
📖 第 1 页 / 共 5 页
字号:
		if ((i->repeatMin != 1) && (i->repeatMin != 255)) {			rval += " min=\"";			rval += asDec(i->repeatMin);			rval += "\"";		}		if ((i->repeatMax != 1) && (i->repeatMax != 255)) {			rval += " max=\"";			rval += asDec(i->repeatMax);			rval += "\"";		}		if (i->tag.length() > 0) {			if (i->type != kMatchElem_Type_Copy) {				rval += " id=\"";				rval += i->tag;				rval += "\"";			}		}		rval += "/>";	}	return rval;}Compiler::Compiler(const char* txt, UInt32 len, char inForm, bool cmp, bool genXML, TECkit_ErrorFn errFunc, void* userData){	compiledTable = 0;	compiledSize = 0;	usedExtStringRules = false;	textPtr = (const unsigned char*)txt;	textEnd = textPtr + len;		ungotten = kInvalidChar;	inputForm = inForm;		generateXML = genXML;		lineNumber = 1;	errorState = false;	errorCount = 0;	errorFunction = errFunc;	errFuncUserData = userData;		names.clear();	fwdTables.clear();	revTables.clear();	currentPass.clear();	buildVars.clear();	currentRule.clear();		lhsFlags = 0;	rhsFlags = 0;		char	classType;		ruleState = notInRule;	int	nestingLevel = 0;	defIter = defEnd;	while (inputForm == kForm_Unspecified) {		// attempt to determine input encoding		if (len >= 4) {			// check for UTF32 BOM or 3 nulls in first 4 bytes			if (strncmp(txt, "\0\0\376\377", 4) == 0) {				inputForm = kForm_UTF32BE;				break;			}			if (strncmp(txt, "\377\376\0\0", 4) == 0) {				inputForm = kForm_UTF32LE;				break;			}			if (strncmp(txt, "\0\0\0", 3) == 0) {				inputForm = kForm_UTF32BE;				break;			}			if (strncmp(txt+1, "\0\0\0", 3) == 0) {				inputForm = kForm_UTF32LE;				break;			}		}		if (len >= 3) {			// check for UTF8 signature			if (strncmp(txt, "\357\273\277", 3) == 0) {				inputForm = kForm_UTF8;				break;			}		}		if (len >= 2) {			// check for UTF16 BOM or null byte			if (strncmp(txt, "\376\377", 2) == 0) {				inputForm = kForm_UTF16BE;				break;			}			if (strncmp(txt, "\377\376", 2) == 0) {				inputForm = kForm_UTF16LE;				break;			}			if (txt[0] == '\0') {				inputForm = kForm_UTF16BE;				break;			}			if (txt[1] == '\0') {				inputForm = kForm_UTF16LE;				break;			}		}		inputForm = kForm_Bytes;	};	if (inputForm != kForm_Bytes) {		// discard initial BOM if present		currCh = getChar();		if (currCh != 0xfeff)			ungetChar(currCh);	}	while (GetNextToken()) {		// on error, skip to next newline	GOT_TOKEN:		if (errorState) {			if (ruleState != notInRule) {				ruleState = notInRule;				nestingLevel = 0;				currentRule.clear();			}			if (tok.type != tok_Newline)				continue;		}		errorState = false;		string32::const_iterator i;		switch (tok.type) {			default:				Error("this can't happen!");				break;							case tok_Unknown:				Error("unexpected character", string(1, tok.val).c_str());				break;			case tok_Identifier:				Error("unexpected identifier", asUTF8(tok.strval).c_str());				break;			case tok_Define:				if (!ExpectToken(tok_Identifier, "expected identifier after Define"))					break;				{					string		defName(asUTF8(tok.strval));					tokListT	defToks;					while (GetNextToken()) {						if (tok.type == tok_Newline)							break;						if (tok.type == tok_Unknown)							Error("unexpected character in Define text", string(1, tok.val).c_str());						else							defToks.push_back(tok);					}					defines[defName] = defToks;				}				break;			case tok_Newline:				switch (ruleState) {					default:						break;					case inLHSString:					case inLHSPreContext:					case inLHSPostContext:						Error("no mapping operator found");						goto GOT_TOKEN;					case inRHSString:					case inRHSPreContext:					case inRHSPostContext:						if (nestingLevel > 0) {							Error("unmatched opening parenthesis");							break;						}						if (ruleType == 0 || ruleType == '>') {							currentPass.fwdRules.push_back(Rule(currentRule.lhsString,								reverseContext(currentRule.lhsPreContext), currentRule.lhsPostContext,								currentRule.rhsString, currentRule.startingLine));						}						if (ruleType == 0 || ruleType == '<') {							currentPass.revRules.push_back(Rule(currentRule.rhsString,								reverseContext(currentRule.rhsPreContext), currentRule.rhsPostContext,								currentRule.lhsString, currentRule.startingLine));						}						if (generateXML) {							// create an XML representation of the rule and append to currentPass.xmlRules/xmlContexts							bool	sourceUni = (currentPass.passType == kCode_UB) || (currentPass.passType == kCode_Unic);							bool	targetUni = (currentPass.passType == kCode_BU) || (currentPass.passType == kCode_Unic);							string	xmlRule;							xmlRule += "<a";							xmlRule += " line=\"";							xmlRule += asDec(currentRule.startingLine);							xmlRule += "\"";							if (ruleType == '>')								xmlRule += " dir=\"fwd\"";							else if (ruleType == '<')								xmlRule += " dir=\"rev\"";							xmlRule += ">\n";														string	contextID;							xmlRule += "<l";							if (currentRule.lhsPreContext.size() != 0) {								contextID = getContextID(currentRule.lhsPreContext, sourceUni);								xmlRule += " preCtx=\"";								xmlRule + contextID;								xmlRule += "\"";							}							if (currentRule.lhsPostContext.size() != 0) {								contextID = getContextID(currentRule.lhsPostContext, sourceUni);								xmlRule += " postCtx=\"";								xmlRule += contextID;								xmlRule += "\"";							}							xmlRule += ">";							xmlRule += xmlString(currentRule.lhsString.begin(), currentRule.lhsString.end(), sourceUni);							xmlRule += "</l>\n";							xmlRule += "<r";							if (currentRule.rhsPreContext.size() != 0) {								contextID = getContextID(currentRule.rhsPreContext, targetUni);								xmlRule += " preCtx=\"";								xmlRule += contextID;								xmlRule += "\"";							}							if (currentRule.rhsPostContext.size() != 0) {								contextID = getContextID(currentRule.rhsPostContext, targetUni);								xmlRule += " postCtx=\"";								xmlRule += contextID;								xmlRule += "\"";							}							xmlRule += ">";							xmlRule += xmlString(currentRule.rhsString.begin(), currentRule.rhsString.end(), targetUni);							xmlRule += "</r>\n";							xmlRule += "</a>\n";							currentPass.xmlRules.push_back(xmlRule);						}						currentRule.clear();						ruleState = notInRule;						break;				}				break;			case tok_Number:				AppendLiteral(tok.val);				break;			case tok_USV:				AppendUSV(tok.val);				break;							case tok_String:				if (inputForm == kForm_Bytes && charLimit() != 0xff) {					Error("can't use quoted string for Unicodes in 8-bit source text");					break;				}				if (inputForm != kForm_Bytes && charLimit() == 0xff) {					Error("can't use quoted string for Bytes in Unicode source text");					break;				}				for (i = tok.strval.begin(); i != tok.strval.end(); ++i)					AppendLiteral(*i);				break;			case '^':				// negation can only apply to a few things:				GetNextToken();				switch (tok.type) {					case tok_Number:						AppendLiteral(tok.val, true);						break;					case tok_USV:						AppendUSV(tok.val, true);						break;					case '[':						if (!ExpectToken(tok_Identifier, "expected CLASS-NAME after opening bracket"))							break;						AppendClass(asUTF8(tok.strval), true);						if (!ExpectToken(']', "expected closing bracket after CLASS-NAME"))							break;						break;					case '#':						AppendSpecial(kMatchElem_Type_EOS, true);						break;					case '.':						AppendSpecial(kMatchElem_Type_ANY, true);						break;					default:						Error("invalid use of negation");						break;				}				break;						case '#':				AppendSpecial(kMatchElem_Type_EOS);				break;						case '.':				AppendSpecial(kMatchElem_Type_ANY);				break;						case '(':				AppendSpecial(kMatchElem_Type_BGroup);				++nestingLevel;				break;							case ')':				if (nestingLevel == 0) {					Error("unmatched closing parenthesis");					break;				}				--nestingLevel;				AppendSpecial(kMatchElem_Type_EGroup);				break;							case '|':				if (nestingLevel == 0) {					Error("alternation only permitted within parentheses");					break;				}				AppendSpecial(kMatchElem_Type_OR);				break;						case tok_Map:			case '>':			case '<':				if (nestingLevel > 0) {					Error("unmatched opening parenthesis");					break;				}				switch (ruleState) {					default:						Error("not within a mapping rule");						break;					case inLHSString:					case inLHSPostContext:						ruleState = inRHSString;						ruleType = (tok.type == tok_Map ? 0 : (tok.type == '>' ? '>' : '<'));						break;					case inLHSPreContext:						Error("no underscore found in context");						break;					case inRHSString:					case inRHSPreContext:					case inRHSPostContext:						Error("extra mapping operator in rule");						break;				}				break;			case '/':				if (nestingLevel > 0) {					Error("unmatched opening parenthesis");					break;				}				switch (ruleState) {					default:						Error("not within a mapping rule");						break;					case inLHSString:						ruleState = inLHSPreContext;						break;					case inRHSString:						ruleState = inRHSPreContext;						break;					case inLHSPreContext:					case inLHSPostContext:					case inRHSPreContext:					case inRHSPostContext:						Error("extra slash in rule");						break;				}				break;			case '_':				if (nestingLevel > 0) {					Error("unmatched opening parenthesis");					break;				}				switch (ruleState) {					default:						Error("not within a mapping rule");						break;					case inLHSPreContext:						ruleState = inLHSPostContext;						break;					case inRHSPreContext:						ruleState = inRHSPostContext;						break;					case inLHSString:					case inRHSString:						Error("underscore only allowed in context");						break;					case inLHSPostContext:					case inRHSPostContext:						Error("extra underscore in context");						break;				}				break;			case '[':				if (!ExpectToken(tok_Identifier, "expected CLASS-NAME after opening bracket"))					break;				AppendClass(asUTF8(tok.strval));				if (!ExpectToken(']', "expected closing bracket after CLASS-NAME"))					break;				break;						case ']':				Error("unmatched closing bracket");				break;						case '=':				if (!ExpectToken(tok_Identifier, "expected tag name after '='"))					break;				AssignTag(asUTF8(tok.strval));				break;			case '@':				if (!ExpectToken(tok_Identifier, "expected tag name after '@'"))					break;				AppendSpecial(kMatchElem_Type_Copy);				AssignTag(asUTF8(tok.strval));				break;						case '?':				SetMinMax(0, 1);				break;						case '*':				SetMinMax(0, 15);				break;						case '+':				SetMinMax(1, 15);				break;						case '{':				{					int	repeatMin = 0;					int repeatMax = 15;					GetNextToken();					if (tok.type == tok_Number) {						repeatMin = repeatMax = tok.val;						GetNextToken();						if (tok.type == ',') {							GetNextToken();							if (tok.type == tok_Number) {								repeatMax = tok.val;								if (!ExpectToken('}', "expected closing brace after repeat counts"))									break;							}							else if (tok.type == '}')								repeatMax = 15;							else {								Error("expected repeat count or closing brace after comma");								break;							}						}						else if (tok.type != '}') {							Error("expected comma or closing brace after repeat count");							break;						}					}					else if (tok.type == ',') {						GetNextToken();						if (tok.type == tok_Number)							repeatMax = tok.val;						else {							Error("expected repeat count");							break;						}						if (!ExpectToken('}', "expected closing brace after repeat count"))							break;					}					else {						Error("expected repeat counts within braces");						break;					}					SetMinMax(repeatMin, repeatMax);				}				break;				case '}':				Error("unmatched closing brace");				break;			case tok_Name:				if (tok.val == 0xffffffff) {					if (!ExpectToken('(', "expected (NUMBER) STRING after Name"))						break;					if (!ExpectToken(tok_Number,  "expected (NUMBER) STRING after Name"))
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -