📄 compiler.cpp

📁 Emdros is a text database middleware-layer aimed at storage and retrieval of "text plus information
💻 CPP
📖 第 1 页 / 共 5 页
字号:
						break;					int nameID = tok.val;					if (!ExpectToken(')', "expected (NUMBER) STRING after Name"))						break;					ReadNameString(nameID);				}				else					ReadNameString(tok.val);				goto GOT_TOKEN;	// ReadNameString has already read the newline			case tok_Flags:				{					if (!ExpectToken('(', "expected (FLAG-LIST) after SourceFlags/TargetFlags"))						break;					UInt32	flagValue = 0;					char	whichFlags = tok.val;					while (1) {						GetNextToken();						if (tok.type == tok_FlagValue)							flagValue |= tok.val;						else							break;					}					if (tok.type != ')') {						Error("expected (FLAG-LIST) after SourceFlags/TargetFlags");						break;					}					if (whichFlags == 'S')						lhsFlags = flagValue;					else						rhsFlags = flagValue;				}				ExpectToken(tok_Newline, "junk at end of line");				break;			case tok_Pass:				FinishPass();				currentPass.setLineNo(lineNumber);				if (!ExpectToken('(', "expected (PASS-TYPE) after Pass"))					break;				GetNextToken();				if (tok.type == tok_PassType)					currentPass.passType = tok.val;				else					Error("unrecognized pass type");				if (!ExpectToken(')', "expected (PASS-TYPE) after Pass"))					break;				ExpectToken(tok_Newline, "junk at end of line");				goto  GOT_TOKEN;						case tok_Default:				StartDefaultPass();				if (currentPass.passType != kCode_BU && currentPass.passType != kCode_UB) {					Error("defaults are only used in Byte_Unicode and Unicode_Byte passes");					break;				}				{					char	whichDefault = tok.val;					GetNextToken();					switch (tok.type) {						case tok_String:							if (tok.strval.length() != 1)								Error("default can only be a single character, not a multi-character string");							else if (whichDefault == 'U') {								if (inputForm == kForm_Bytes)									Error("UniDefault cannot use quoted character in 8-bit source text");								else									currentPass.uniDefault = tok.strval[0];							}							else {								if (inputForm != kForm_Bytes)									Error("ByteDefault cannot use quoted character in Unicode source text");								else									currentPass.byteDefault = tok.strval[0];							}							break;						case tok_Number:							if (whichDefault == 'U')								currentPass.uniDefault = tok.val;							else								currentPass.byteDefault = tok.val;							break;						case tok_USV:							if (whichDefault == 'U')								currentPass.uniDefault = tok.val;							else								Error("can't use Unicode value in byte encoding");							break;						default:							Error("expected character code after ByteDefault/UniDefault");							break;					}				}				break;						case tok_Class:				StartDefaultPass();				classLine = lineNumber;				if (tok.val == 0) {					if (currentPass.passType == kCode_Byte)						classType = 'B';					else if (currentPass.passType == kCode_Unic)						classType = 'U';					else {						Error("must use ByteClass or UniClass to define classes in this pass");						break;					}				}				else {					classType = tok.val;					if (classType == 'B' && currentPass.passType == kCode_Unic)						Error("can't use ByteClass in this pass");					else if (classType == 'U' && currentPass.passType == kCode_Byte)						Error("can't use UniClass in this pass");				}				{					UInt32	classLimit = (classType == 'U' ? 0x10ffff : 0xff);					if (!ExpectToken('[', "expected [CLASS-NAME] after Class/ByteClass/UniClass"))						break;					if (!ExpectToken(tok_Identifier, "expected [CLASS-NAME] after Class/ByteClass/UniClass"))						break;					string	className(asUTF8(tok.strval));					if (!ExpectToken(']', "expected [CLASS-NAME] after Class/ByteClass/UniClass"))						break;					if (!ExpectToken('=', "expected =(CHARACTER-CODE-LIST) after Class/ByteClass/UniClass[CLASS-NAME]"))						break;					if (!ExpectToken('(', "expected =(CHARACTER-CODE-LIST) after Class/ByteClass/UniClass[CLASS-NAME]"))						break;					vector<UInt32>	classMembers;					bool	ellipsis = false;					bool	ellipsisOK = false;					while (tok.type != ')' && tok.type != tok_Newline) {						GetNextToken();						switch (tok.type) {							case tok_USV:								if (classType == 'B') {									Error("can't use Unicode value in byte encoding");									break;								}								// fall through							case tok_Number:								if (tok.val > classLimit) {									Error("class element outside valid range");									break;								}								if (ellipsis) {									ellipsis = false;									ellipsisOK = false;									UInt32	lastVal = classMembers.back();									if (tok.val < lastVal) {										Error("range out of order");										break;									}									while (++lastVal <= tok.val)										classMembers.push_back(lastVal);								}								else {									classMembers.push_back(tok.val);									ellipsisOK = true;								}								if (classMembers.back() > 0x0000ffff)									currentPass.supplementaryChars = true;								break;															case tok_String:								if (classType == 'U' && inputForm == kForm_Bytes) {									Error("can't use quoted string for Unicode class in 8-bit source text");									break;								}								if (classType == 'B' && inputForm != kForm_Bytes) {									Error("can't use quoted string for Byte class in Unicode source text");									break;								}								if (ellipsis) {									ellipsis = false;									ellipsisOK = false;									if (tok.strval.length() != 1) {										Error("can only use single-character string with ..");										break;									}									UInt32	lastVal = classMembers.back();									if (tok.strval[0] < lastVal) {										Error("range out of order");										break;									}									while (++lastVal <= tok.strval[0])										classMembers.push_back(lastVal);									break;								}								ellipsisOK = (tok.strval.length() == 1);								for (i = tok.strval.begin(); i < tok.strval.end(); ++i)									classMembers.push_back(*i);								break;															case tok_Ellipsis:								if (ellipsisOK) {									ellipsisOK = false;									ellipsis = true;								}								else									Error("illegal .. in class");								break;															case '[':								{									if (ellipsis) {										Error("can't use [CLASS-NAME] after ..");										break;									}									ellipsis = false;									ellipsisOK = false;									// get the referenced class and copy in its members									if (ExpectToken(tok_Identifier, "expected [CLASS-NAME]")) {										string	refName(asUTF8(tok.strval));										if (classType == 'U') {											map<string,UInt32>::const_iterator	c = currentPass.uniClassNames.find(refName);											if (c == currentPass.uniClassNames.end()) {												Error("undefined class used", refName.c_str());												break;											}											Class	uc = currentPass.uniClassMembers[c->second];											for (Class::const_iterator i = uc.begin(); i != uc.end(); ++i)												classMembers.push_back(*i);										}										else {											map<string,UInt32>::const_iterator	c = currentPass.byteClassNames.find(refName);											if (c == currentPass.byteClassNames.end()) {												Error("undefined class used", refName.c_str());												break;											}											Class	bc = currentPass.byteClassMembers[c->second];											for (Class::const_iterator i = bc.begin(); i != bc.end(); ++i)												classMembers.push_back(*i);										}										if (!ExpectToken(']', "expected closing bracket after CLASS-NAME"))											break;									}								}								break;															case ')':								if (ellipsis)									Error("trailing .. in class");								break;															case tok_Newline:								Error("unexpected end of line within class");								break;															case tok_Identifier:								Error("unexpected identifier within class", asUTF8(tok.strval).c_str());								break;							default:								Error("unexpected token within class", string((const char*)tokStart, (const char*)textPtr - (const char*)tokStart).c_str());								break;						}					}					if (tok.type != tok_Newline)						if (!ExpectToken(tok_Newline, "junk at end of line"))							break;					// ok, we've got the class name and members; save it					if (classType == 'U') {						if (currentPass.uniClassNames.find(className) != currentPass.uniClassNames.end()) {							Error("class already defined", className.c_str());							break;						}						currentPass.uniClassNames[className] = currentPass.uniClassMembers.size();						currentPass.uniClassMembers.push_back(classMembers);						currentPass.uniClassLines.push_back(classLine);					}					else {						if (currentPass.byteClassNames.find(className) != currentPass.byteClassNames.end()) {							Error("class already defined", className.c_str());							break;						}						currentPass.byteClassNames[className] = currentPass.byteClassMembers.size();						currentPass.byteClassMembers.push_back(classMembers);						currentPass.byteClassLines.push_back(classLine);					}					goto GOT_TOKEN;				}				break;		}	}	FinishPass();	// Do we have names for both LHS and RHS? If not, is LHS legacy and RHS Unicode?	if (names.find(kNameID_LHS_Name) == names.end()) {		Error("EncodingName or LHSName must be specified");	}	const string&   lhs = names[kNameID_LHS_Name];	if (lhs.find("(REG_ID)") != lhs.npos) {		Error("Draft mappings generated by Encore2Unicode MUST be reviewed before use");	}	if (names.find(kNameID_RHS_Name) == names.end()) {		if ((lhsFlags & kFlags_Unicode) == 0 || (rhsFlags & kFlags_Unicode) != 0) {			names[kNameID_RHS_Name] = "UNICODE";		}		else {			Error("RHSName must be specified for non-Legacy/Unicode mapping table");		}	}	if (errorCount == 0) {		if (generateXML) {			string	header;			header += "<?xml version=\"1.0\"?>\n";			header += "<teckitMapping\n";	#define doName(att,name_id)								\			if (names.find(name_id) != names.end()) {	\				header += " ";							\				header += att;							\				header += "=\"";						\				header += names[name_id];				\				header += "\"\n";						\			}				doName("lhsName", kNameID_LHS_Name);			doName("rhsName", kNameID_RHS_Name);			doName("lhsDescription", kNameID_LHS_Description);			doName("rhsDescription", kNameID_RHS_Description);			doName("version", kNameID_Version);			doName("contact", kNameID_Contact);			doName("registrationAuthority", kNameID_RegAuthority);			doName("registrationName", kNameID_RegName);			doName("copyright", kNameID_Copyright);			if (lhsFlags & kFlags_ExpectsNFC)				header += " lhsExpects=\"NFC\"\n";			else if (lhsFlags & kFlags_ExpectsNFD)				header += " lhsExpects=\"NFD\"\n";			if (rhsFlags & kFlags_ExpectsNFC)				header += " rhsExpects=\"NFC\"\n";			else if (rhsFlags & kFlags_ExpectsNFD)				header += " rhsExpects=\"NFD\"\n";			header += ">\n";			string	trailer("</teckitMapping>\n");						compiledSize = header.length() + xmlRepresentation.length() + trailer.length();			compiledTable = (Byte*)malloc(compiledSize + 1);			if (compiledTable == NULL)				throw bad_alloc();						memcpy(compiledTable, header.data(), header.length());			memcpy(compiledTable + header.length(), xmlRepresentation.data(), xmlRepresentation.length());			memcpy(compiledTable + header.length() + xmlRepresentation.length(), trailer.data(), trailer.length());			compiledTable[compiledSize] = 0;			xmlRepresentation.erase(xmlRepresentation.begin(), xmlRepresentation.end());		}		else {			// assemble the complete compiled file			FileHeader	fh;			WRITE(fh.type, kMagicNumber);			WRITE(fh.version, usedExtStringRules ? kCurrentFileVersion : kFileVersion2_1);			WRITE(fh.headerLength, 0);	// to be filled in later, once names and table counts are known			WRITE(fh.formFlagsLHS, lhsFlags);			WRITE(fh.formFlagsRHS, rhsFlags);				WRITE(fh.numFwdTables, fwdTables.size());			WRITE(fh.numRevTables, revTables.size());			WRITE(fh.numNames, names.size());						string	offsets;			UInt32	offset = sizeof(FileHeader) + (names.size() + fwdTables.size() + revTables.size()) * sizeof(UInt32);			UInt32	prevLength = 0;						// sort the name IDs into ascending order			vector<UInt16>	nameIDs;			nameIDs.reserve(names.size());			for (map<UInt16,string>::const_iterator n = names.begin(); n != names.end(); ++n) {				nameIDs.push_back(n->first);			}			sort(nameIDs.begin(), nameIDs.end());						// pack all the name records			string	namesData;			for (vector<UInt16>::const_iterator i = nameIDs.begin(); i != nameIDs.end(); ++i) {				appendToTable(offsets, (const char*)&offset, sizeof(offset));				NameRec	r;				WRITE(r.nameID, *i);				WRITE(r.nameLength, names[*i].length());				namesData.append((const char*)&r, sizeof(r));				namesData.append(names[*i]);				if ((namesData.length() & 1) != 0)					namesData.append(1, (char)0);				offset += namesData.length() - prevLength;				prevLength = namesData.length();			}			if ((namesData.length() & 2) != 0)				namesData.append(2, (char)0);			offset += namesData.length() - prevLength;						// pack the offsets to the actual mapping tables			vector<string>::const_iterator t;			for (t = fwdTables.begin(); t != fwdTables.end(); ++t) {				appendToTable(offsets, (const char*)&offset, sizeof(offset));				offset += t->size();			}			for (t = revTables.end(); t != revTables.begin(); ) {				--t;				appendToTable(offsets, (const char*)&offset, sizeof(offset));				offset += t->size();			}						WRITE(fh.headerLength, sizeof(fh) + offsets.length() + namesData.length());				if (errorCount == 0) {				// calculate total size of compiled table, malloc() it, and copy everything into it				compiledSize = sizeof(fh)							+ offsets.length()							+ namesData.length();				for (t = fwdTables.begin(); t != fwdTables.end(); ++t)					compiledSize += t->length();				for (t = revTables.begin(); t != revTables.end(); ++t)					compiledSize += t->length();					compiledTable = (Byte*)malloc(compiledSize);				if (compiledTable != 0) {					char*	cp = (char*)compiledTable;					memcpy(cp, &fh, sizeof(fh));					cp += sizeof(fh);					memcpy(cp, offsets.data(), offsets.length());					cp += offsets.length();					memcpy(cp, namesData.data(), namesData.length());					cp += namesData.length();					for (t = fwdTables.begin(); t != fwdTables.end(); ++t) {						memcpy(cp, t->data(), t->length());						cp += t->length();					}					for (t = revTables.end(); t != revTables.begin(); ) {						--t;						memcpy(cp, t->data(), t->length());						cp += t->length();					}					if ((char*)compiledTable + compiledSize != cp)						cerr << "error!" << endl;				}				else					throw bad_alloc();			}			#ifndef NO_ZLIB			if (errorCount == 0 && cmp) {				// do the compression...				unsigned long	destLen = compiledSize * 11 / 10 + 20;				Byte*	dest = (Byte*)malloc(destLen + 8);				if (dest != 0) {					int	result = compress2(dest + 8, &destLen, compiledTable, compiledSize, Z_BEST_COMPRESSION);					if (result == Z_OK) {						destLen += 8;						dest = (Byte*)realloc(dest, destLen); // shrink dest to fit						WRITE(((FileHeader*)dest)->type, kMagicNumberCmp);						WRITE(((FileHeader*)dest)->version, compiledSize);						free(compiledTable);						compiledTable = dest;						compiledSize = destLen;					}					else						free(dest);				}			}#endif		}	}}Compiler::~Compiler(){	if (compiledTable != 0)		free(compiledTable);}voidCompiler::GetCompiledTable(Byte*& table, UInt32& len) const{	table = compiledTable;	len = compiledSize;}voidCompiler::DetachCompiledTable(){	compiledTable = 0;	compiledSize = 0;}stringCompiler::asUTF8(const string32 s){	string	rval;	string32::const_iterator i;	for (i = s.begin(); i != s.end(); ++i) {		UInt32	c = *i;		int	bytesToWrite;		if (c < 0x80) {				bytesToWrite = 1;
💿 文件大小 7978 K
👤 上传用户 oujk123
📂 所属分类其他行业
🏷️ 相关标签

#text #middleware-layer #information #retrieval
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -