📄 compiler.cpp
字号:
break; int nameID = tok.val; if (!ExpectToken(')', "expected (NUMBER) STRING after Name")) break; ReadNameString(nameID); } else ReadNameString(tok.val); goto GOT_TOKEN; // ReadNameString has already read the newline case tok_Flags: { if (!ExpectToken('(', "expected (FLAG-LIST) after SourceFlags/TargetFlags")) break; UInt32 flagValue = 0; char whichFlags = tok.val; while (1) { GetNextToken(); if (tok.type == tok_FlagValue) flagValue |= tok.val; else break; } if (tok.type != ')') { Error("expected (FLAG-LIST) after SourceFlags/TargetFlags"); break; } if (whichFlags == 'S') lhsFlags = flagValue; else rhsFlags = flagValue; } ExpectToken(tok_Newline, "junk at end of line"); break; case tok_Pass: FinishPass(); currentPass.setLineNo(lineNumber); if (!ExpectToken('(', "expected (PASS-TYPE) after Pass")) break; GetNextToken(); if (tok.type == tok_PassType) currentPass.passType = tok.val; else Error("unrecognized pass type"); if (!ExpectToken(')', "expected (PASS-TYPE) after Pass")) break; ExpectToken(tok_Newline, "junk at end of line"); goto GOT_TOKEN; case tok_Default: StartDefaultPass(); if (currentPass.passType != kCode_BU && currentPass.passType != kCode_UB) { Error("defaults are only used in Byte_Unicode and Unicode_Byte passes"); break; } { char whichDefault = tok.val; GetNextToken(); switch (tok.type) { case tok_String: if (tok.strval.length() != 1) Error("default can only be a single character, not a multi-character string"); else if (whichDefault == 'U') { if (inputForm == kForm_Bytes) Error("UniDefault cannot use quoted character in 8-bit source text"); else currentPass.uniDefault = tok.strval[0]; } else { if (inputForm != kForm_Bytes) Error("ByteDefault cannot use quoted character in Unicode source text"); else currentPass.byteDefault = tok.strval[0]; } break; case tok_Number: if (whichDefault == 'U') currentPass.uniDefault = tok.val; else currentPass.byteDefault = tok.val; break; case tok_USV: if (whichDefault == 'U') currentPass.uniDefault = tok.val; else Error("can't use Unicode value in byte encoding"); break; default: Error("expected character code after ByteDefault/UniDefault"); break; } } break; case tok_Class: StartDefaultPass(); classLine = lineNumber; if (tok.val == 0) { if (currentPass.passType == kCode_Byte) classType = 'B'; else if (currentPass.passType == kCode_Unic) classType = 'U'; else { Error("must use ByteClass or UniClass to define classes in this pass"); break; } } else { classType = tok.val; if (classType == 'B' && currentPass.passType == kCode_Unic) Error("can't use ByteClass in this pass"); else if (classType == 'U' && currentPass.passType == kCode_Byte) Error("can't use UniClass in this pass"); } { UInt32 classLimit = (classType == 'U' ? 0x10ffff : 0xff); if (!ExpectToken('[', "expected [CLASS-NAME] after Class/ByteClass/UniClass")) break; if (!ExpectToken(tok_Identifier, "expected [CLASS-NAME] after Class/ByteClass/UniClass")) break; string className(asUTF8(tok.strval)); if (!ExpectToken(']', "expected [CLASS-NAME] after Class/ByteClass/UniClass")) break; if (!ExpectToken('=', "expected =(CHARACTER-CODE-LIST) after Class/ByteClass/UniClass[CLASS-NAME]")) break; if (!ExpectToken('(', "expected =(CHARACTER-CODE-LIST) after Class/ByteClass/UniClass[CLASS-NAME]")) break; vector<UInt32> classMembers; bool ellipsis = false; bool ellipsisOK = false; while (tok.type != ')' && tok.type != tok_Newline) { GetNextToken(); switch (tok.type) { case tok_USV: if (classType == 'B') { Error("can't use Unicode value in byte encoding"); break; } // fall through case tok_Number: if (tok.val > classLimit) { Error("class element outside valid range"); break; } if (ellipsis) { ellipsis = false; ellipsisOK = false; UInt32 lastVal = classMembers.back(); if (tok.val < lastVal) { Error("range out of order"); break; } while (++lastVal <= tok.val) classMembers.push_back(lastVal); } else { classMembers.push_back(tok.val); ellipsisOK = true; } if (classMembers.back() > 0x0000ffff) currentPass.supplementaryChars = true; break; case tok_String: if (classType == 'U' && inputForm == kForm_Bytes) { Error("can't use quoted string for Unicode class in 8-bit source text"); break; } if (classType == 'B' && inputForm != kForm_Bytes) { Error("can't use quoted string for Byte class in Unicode source text"); break; } if (ellipsis) { ellipsis = false; ellipsisOK = false; if (tok.strval.length() != 1) { Error("can only use single-character string with .."); break; } UInt32 lastVal = classMembers.back(); if (tok.strval[0] < lastVal) { Error("range out of order"); break; } while (++lastVal <= tok.strval[0]) classMembers.push_back(lastVal); break; } ellipsisOK = (tok.strval.length() == 1); for (i = tok.strval.begin(); i < tok.strval.end(); ++i) classMembers.push_back(*i); break; case tok_Ellipsis: if (ellipsisOK) { ellipsisOK = false; ellipsis = true; } else Error("illegal .. in class"); break; case '[': { if (ellipsis) { Error("can't use [CLASS-NAME] after .."); break; } ellipsis = false; ellipsisOK = false; // get the referenced class and copy in its members if (ExpectToken(tok_Identifier, "expected [CLASS-NAME]")) { string refName(asUTF8(tok.strval)); if (classType == 'U') { map<string,UInt32>::const_iterator c = currentPass.uniClassNames.find(refName); if (c == currentPass.uniClassNames.end()) { Error("undefined class used", refName.c_str()); break; } Class uc = currentPass.uniClassMembers[c->second]; for (Class::const_iterator i = uc.begin(); i != uc.end(); ++i) classMembers.push_back(*i); } else { map<string,UInt32>::const_iterator c = currentPass.byteClassNames.find(refName); if (c == currentPass.byteClassNames.end()) { Error("undefined class used", refName.c_str()); break; } Class bc = currentPass.byteClassMembers[c->second]; for (Class::const_iterator i = bc.begin(); i != bc.end(); ++i) classMembers.push_back(*i); } if (!ExpectToken(']', "expected closing bracket after CLASS-NAME")) break; } } break; case ')': if (ellipsis) Error("trailing .. in class"); break; case tok_Newline: Error("unexpected end of line within class"); break; case tok_Identifier: Error("unexpected identifier within class", asUTF8(tok.strval).c_str()); break; default: Error("unexpected token within class", string((const char*)tokStart, (const char*)textPtr - (const char*)tokStart).c_str()); break; } } if (tok.type != tok_Newline) if (!ExpectToken(tok_Newline, "junk at end of line")) break; // ok, we've got the class name and members; save it if (classType == 'U') { if (currentPass.uniClassNames.find(className) != currentPass.uniClassNames.end()) { Error("class already defined", className.c_str()); break; } currentPass.uniClassNames[className] = currentPass.uniClassMembers.size(); currentPass.uniClassMembers.push_back(classMembers); currentPass.uniClassLines.push_back(classLine); } else { if (currentPass.byteClassNames.find(className) != currentPass.byteClassNames.end()) { Error("class already defined", className.c_str()); break; } currentPass.byteClassNames[className] = currentPass.byteClassMembers.size(); currentPass.byteClassMembers.push_back(classMembers); currentPass.byteClassLines.push_back(classLine); } goto GOT_TOKEN; } break; } } FinishPass(); // Do we have names for both LHS and RHS? If not, is LHS legacy and RHS Unicode? if (names.find(kNameID_LHS_Name) == names.end()) { Error("EncodingName or LHSName must be specified"); } const string& lhs = names[kNameID_LHS_Name]; if (lhs.find("(REG_ID)") != lhs.npos) { Error("Draft mappings generated by Encore2Unicode MUST be reviewed before use"); } if (names.find(kNameID_RHS_Name) == names.end()) { if ((lhsFlags & kFlags_Unicode) == 0 || (rhsFlags & kFlags_Unicode) != 0) { names[kNameID_RHS_Name] = "UNICODE"; } else { Error("RHSName must be specified for non-Legacy/Unicode mapping table"); } } if (errorCount == 0) { if (generateXML) { string header; header += "<?xml version=\"1.0\"?>\n"; header += "<teckitMapping\n"; #define doName(att,name_id) \ if (names.find(name_id) != names.end()) { \ header += " "; \ header += att; \ header += "=\""; \ header += names[name_id]; \ header += "\"\n"; \ } doName("lhsName", kNameID_LHS_Name); doName("rhsName", kNameID_RHS_Name); doName("lhsDescription", kNameID_LHS_Description); doName("rhsDescription", kNameID_RHS_Description); doName("version", kNameID_Version); doName("contact", kNameID_Contact); doName("registrationAuthority", kNameID_RegAuthority); doName("registrationName", kNameID_RegName); doName("copyright", kNameID_Copyright); if (lhsFlags & kFlags_ExpectsNFC) header += " lhsExpects=\"NFC\"\n"; else if (lhsFlags & kFlags_ExpectsNFD) header += " lhsExpects=\"NFD\"\n"; if (rhsFlags & kFlags_ExpectsNFC) header += " rhsExpects=\"NFC\"\n"; else if (rhsFlags & kFlags_ExpectsNFD) header += " rhsExpects=\"NFD\"\n"; header += ">\n"; string trailer("</teckitMapping>\n"); compiledSize = header.length() + xmlRepresentation.length() + trailer.length(); compiledTable = (Byte*)malloc(compiledSize + 1); if (compiledTable == NULL) throw bad_alloc(); memcpy(compiledTable, header.data(), header.length()); memcpy(compiledTable + header.length(), xmlRepresentation.data(), xmlRepresentation.length()); memcpy(compiledTable + header.length() + xmlRepresentation.length(), trailer.data(), trailer.length()); compiledTable[compiledSize] = 0; xmlRepresentation.erase(xmlRepresentation.begin(), xmlRepresentation.end()); } else { // assemble the complete compiled file FileHeader fh; WRITE(fh.type, kMagicNumber); WRITE(fh.version, usedExtStringRules ? kCurrentFileVersion : kFileVersion2_1); WRITE(fh.headerLength, 0); // to be filled in later, once names and table counts are known WRITE(fh.formFlagsLHS, lhsFlags); WRITE(fh.formFlagsRHS, rhsFlags); WRITE(fh.numFwdTables, fwdTables.size()); WRITE(fh.numRevTables, revTables.size()); WRITE(fh.numNames, names.size()); string offsets; UInt32 offset = sizeof(FileHeader) + (names.size() + fwdTables.size() + revTables.size()) * sizeof(UInt32); UInt32 prevLength = 0; // sort the name IDs into ascending order vector<UInt16> nameIDs; nameIDs.reserve(names.size()); for (map<UInt16,string>::const_iterator n = names.begin(); n != names.end(); ++n) { nameIDs.push_back(n->first); } sort(nameIDs.begin(), nameIDs.end()); // pack all the name records string namesData; for (vector<UInt16>::const_iterator i = nameIDs.begin(); i != nameIDs.end(); ++i) { appendToTable(offsets, (const char*)&offset, sizeof(offset)); NameRec r; WRITE(r.nameID, *i); WRITE(r.nameLength, names[*i].length()); namesData.append((const char*)&r, sizeof(r)); namesData.append(names[*i]); if ((namesData.length() & 1) != 0) namesData.append(1, (char)0); offset += namesData.length() - prevLength; prevLength = namesData.length(); } if ((namesData.length() & 2) != 0) namesData.append(2, (char)0); offset += namesData.length() - prevLength; // pack the offsets to the actual mapping tables vector<string>::const_iterator t; for (t = fwdTables.begin(); t != fwdTables.end(); ++t) { appendToTable(offsets, (const char*)&offset, sizeof(offset)); offset += t->size(); } for (t = revTables.end(); t != revTables.begin(); ) { --t; appendToTable(offsets, (const char*)&offset, sizeof(offset)); offset += t->size(); } WRITE(fh.headerLength, sizeof(fh) + offsets.length() + namesData.length()); if (errorCount == 0) { // calculate total size of compiled table, malloc() it, and copy everything into it compiledSize = sizeof(fh) + offsets.length() + namesData.length(); for (t = fwdTables.begin(); t != fwdTables.end(); ++t) compiledSize += t->length(); for (t = revTables.begin(); t != revTables.end(); ++t) compiledSize += t->length(); compiledTable = (Byte*)malloc(compiledSize); if (compiledTable != 0) { char* cp = (char*)compiledTable; memcpy(cp, &fh, sizeof(fh)); cp += sizeof(fh); memcpy(cp, offsets.data(), offsets.length()); cp += offsets.length(); memcpy(cp, namesData.data(), namesData.length()); cp += namesData.length(); for (t = fwdTables.begin(); t != fwdTables.end(); ++t) { memcpy(cp, t->data(), t->length()); cp += t->length(); } for (t = revTables.end(); t != revTables.begin(); ) { --t; memcpy(cp, t->data(), t->length()); cp += t->length(); } if ((char*)compiledTable + compiledSize != cp) cerr << "error!" << endl; } else throw bad_alloc(); } #ifndef NO_ZLIB if (errorCount == 0 && cmp) { // do the compression... unsigned long destLen = compiledSize * 11 / 10 + 20; Byte* dest = (Byte*)malloc(destLen + 8); if (dest != 0) { int result = compress2(dest + 8, &destLen, compiledTable, compiledSize, Z_BEST_COMPRESSION); if (result == Z_OK) { destLen += 8; dest = (Byte*)realloc(dest, destLen); // shrink dest to fit WRITE(((FileHeader*)dest)->type, kMagicNumberCmp); WRITE(((FileHeader*)dest)->version, compiledSize); free(compiledTable); compiledTable = dest; compiledSize = destLen; } else free(dest); } }#endif } }}Compiler::~Compiler(){ if (compiledTable != 0) free(compiledTable);}voidCompiler::GetCompiledTable(Byte*& table, UInt32& len) const{ table = compiledTable; len = compiledSize;}voidCompiler::DetachCompiledTable(){ compiledTable = 0; compiledSize = 0;}stringCompiler::asUTF8(const string32 s){ string rval; string32::const_iterator i; for (i = s.begin(); i != s.end(); ++i) { UInt32 c = *i; int bytesToWrite; if (c < 0x80) { bytesToWrite = 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -