📄 compiler.cpp
字号:
} else if (c < 0x800) { bytesToWrite = 2; } else if (c < 0x10000) { bytesToWrite = 3; } else if (c < 0x200000) { bytesToWrite = 4; } else { bytesToWrite = 2; c = 0x0000fffd; }; rval.append((size_t)bytesToWrite, 0); int index = rval.length(); switch (bytesToWrite) { /* note: code falls through cases! */ case 4: rval[--index] = (c | byteMark) & byteMask; c >>= 6; case 3: rval[--index] = (c | byteMark) & byteMask; c >>= 6; case 2: rval[--index] = (c | byteMark) & byteMask; c >>= 6; case 1: rval[--index] = c | firstByteMark[bytesToWrite]; }; } return rval;}voidCompiler::ReadNameString(UInt16 nameID){ if (ExpectToken(tok_String, "expected STRING after name keyword")) { if (inputForm == kForm_Bytes) { names[nameID].erase(names[nameID].begin(), names[nameID].end()); for (string32::const_iterator i = tok.strval.begin(); i != tok.strval.end(); ++i) names[nameID].append(1, *i); } else names[nameID] = asUTF8(tok.strval); ExpectToken(tok_Newline, "junk at end of line"); }}voidCompiler::FinishPass(){ if (currentPass.passType == 0) return; if ((currentPass.passType & 0xFFFF0000) == (FOUR_CHAR_CODE('N','F','_','_') & 0xFFFF0000)) { while (errorCount == 0) { if (fwdTables.size() == 0) lhsFlags |= kFlags_Unicode; else { if ((rhsFlags & kFlags_Unicode) == 0) { Error("normalization only supported in Unicode space"); break; } } rhsFlags |= kFlags_Unicode; string normTable((currentPass.passType & 0x0000FF00) == (FOUR_CHAR_CODE('_','_','C','_') & 0x0000FF00) ? "NFC " : "NFD "); if ((currentPass.passType & 0x000000FF) != 'r') fwdTables.push_back(normTable); if ((currentPass.passType & 0x000000FF) != 'f') revTables.push_back(normTable); if (generateXML) { xmlOut("<pass lhs=\"unicode\" rhs=\"unicode\" line=\""); xmlOut(asDec(currentPass.startingLine)); xmlOut("\">\n"); xmlOut("<normalize form=\""); xmlOut(normTable[2]); if ((currentPass.passType & 0x000000FF) == 'f') xmlOut(" dir=\"fwd\""); else if ((currentPass.passType & 0x000000FF) == 'r') xmlOut(" dir=\"rev\""); xmlOut("\">\n"); xmlOut("</pass>\n"); } break; } } else { while (errorCount == 0) { // not really a loop; just so we can use 'break' to exit early bool sourceUni = (currentPass.passType == kCode_UB) || (currentPass.passType == kCode_Unic); bool targetUni = (currentPass.passType == kCode_BU) || (currentPass.passType == kCode_Unic); if (generateXML) { // pass header xmlOut("<pass lhs=\""); xmlOut(sourceUni ? "unicode" : "bytes"); xmlOut("\" rhs=\""); xmlOut(targetUni ? "unicode" : "bytes"); if (sourceUni != targetUni) { xmlOut("\" lhsDefault=\""); xmlOut(sourceUni ? asHex(currentPass.uniDefault, 4) : asHex(currentPass.byteDefault, 2)); xmlOut("\" rhsDefault=\""); xmlOut(targetUni ? asHex(currentPass.uniDefault, 4) : asHex(currentPass.byteDefault, 2)); } xmlOut("\" line=\""); xmlOut(asDec(currentPass.startingLine)); xmlOut("\">\n"); // class definitions if (currentPass.byteClassMembers.size() > 0 || currentPass.uniClassMembers.size() > 0) { xmlOut("<classes>\n"); unsigned int i; for (i = 0; i < currentPass.byteClassMembers.size(); ++i) { xmlOut("<class size=\"bytes\" name=\"b_"); xmlOut(getClassName(currentPass.byteClassNames, i)); xmlOut("\" line=\""); xmlOut(asDec(currentPass.byteClassLines[i])); xmlOut("\">"); for (Class::const_iterator ci = currentPass.byteClassMembers[i].begin(); ci != currentPass.byteClassMembers[i].end(); ++ci) { xmlOut(ci == currentPass.byteClassMembers[i].begin() ? "\n" : " "); xmlOut(asHex(*ci, 2)); } xmlOut("\n</class>\n"); } for (i = 0; i < currentPass.uniClassMembers.size(); ++i) { xmlOut("<class size=\"unicode\" name=\"u_"); xmlOut(getClassName(currentPass.uniClassNames, i)); xmlOut("\" line=\""); xmlOut(asDec(currentPass.uniClassLines[i])); xmlOut("\">"); for (Class::const_iterator ci = currentPass.uniClassMembers[i].begin(); ci != currentPass.uniClassMembers[i].end(); ++ci) { xmlOut(ci == currentPass.uniClassMembers[i].begin() ? "\n" : " "); xmlOut(asHex(*ci, 4)); } xmlOut("\n</class>\n"); } xmlOut("</classes>\n"); } if (currentPass.xmlContexts.size() > 0) { xmlOut("<contexts>\n"); for (map<string,string>::const_iterator i = currentPass.xmlContexts.begin(); i != currentPass.xmlContexts.end(); ++i) { xmlOut("<context id=\""); xmlOut(i->second); xmlOut("\">"); xmlOut(i->first); xmlOut("</context>\n"); } xmlOut("</contexts>\n"); } xmlOut("<assignments>\n"); for (vector<string>::const_iterator i = currentPass.xmlRules.begin(); i != currentPass.xmlRules.end(); ++i) { xmlOut(*i); } xmlOut("</assignments>\n"); // end pass xmlOut("</pass>\n"); } if (fwdTables.size() == 0) { if (sourceUni) lhsFlags |= kFlags_Unicode; } else { if (sourceUni != ((rhsFlags & kFlags_Unicode) != 0)) { Error("code space mismatch"); break; } } rhsFlags &= ~kFlags_Unicode; if (targetUni) rhsFlags |= kFlags_Unicode; // deal with COPY on LHS, and set up class/copy replacement index fields associateItems(currentPass.fwdRules, sourceUni, targetUni); if (errorCount > 0) break; setGroupPointers(currentPass.fwdRules); // sort rules by length (also propagates repeat counts from EGroup back to BGroup items) sortRules(currentPass.fwdRules); if (errorCount > 0) break; // build the forward table fwdTables.push_back(string()); buildTable(currentPass.fwdRules, sourceUni, targetUni, fwdTables.back()); buildVars.clear(); if (errorCount > 0) break; // build the reverse table associateItems(currentPass.revRules, targetUni, sourceUni); if (errorCount > 0) break; setGroupPointers(currentPass.revRules); sortRules(currentPass.revRules); if (errorCount > 0) break; revTables.push_back(string()); buildTable(currentPass.revRules, targetUni, sourceUni, revTables.back()); buildVars.clear(); break; } } currentPass.clear(); currentPass.setLineNo(lineNumber);}voidCompiler::SkipSpaces(void){ while (textPtr < textEnd) { currCh = getChar(); if (currCh != ' ' && currCh != '\t') { ungetChar(currCh); break; } }}Compiler::tokenTypeCompiler::IDlookup(const char* str, UInt32 len){ const Keyword *k = &keywords[0]; while (k->keyword != 0) if (strmatch(k->keyword, str, len)) { tok.val = k->refCon; return k->token; } else ++k; // try for a macro map<string,tokListT>::const_iterator i = defines.find(string(str, len)); if (i != defines.end()) { defIter = i->second.begin(); defEnd = i->second.end(); tok = *defIter; defIter++; return tok.type; } // didn't find the identifier as a keyword; try as a Unicode char name // NOTE: the names are now sorted (by Unicode name), so we could use a binary // search here if anyone complains about compilation time when using names :) const CharName *c = &gUnicodeNames[0]; while (c->name != 0) if (unicodeNameCompare(c->name, str, len) == 0) { tok.val = c->usv; return tok_USV; } else ++c;#ifdef __MWERKS__ tok.strval.clear();#else tok.strval.erase(tok.strval.begin(), tok.strval.end());#endif while (len-- > 0) tok.strval.append(1, *str++); return tok_Identifier;}UInt32Compiler::getChar(){ UInt32 rval = 0; if (ungotten != kInvalidChar) { rval = ungotten; ungotten = kInvalidChar; return rval; }#define CHECK_AVAIL(x) \ if (textPtr + (x) > textEnd) { \ textPtr = textEnd; \ return kInvalidChar; \ } switch (inputForm) { case kForm_Bytes: rval = *textPtr++; break; case kForm_UTF8: { UInt16 extraBytes = bytesFromUTF8[*textPtr]; CHECK_AVAIL(extraBytes + 1); switch (extraBytes) { // note: code falls through cases! case 5: rval += *textPtr++; rval <<= 6; case 4: rval += *textPtr++; rval <<= 6; case 3: rval += *textPtr++; rval <<= 6; case 2: rval += *textPtr++; rval <<= 6; case 1: rval += *textPtr++; rval <<= 6; case 0: rval += *textPtr++; }; rval -= offsetsFromUTF8[extraBytes]; } break; case kForm_UTF16BE: CHECK_AVAIL(2); rval = *textPtr++ << 8; rval += *textPtr++; if (rval >= kSurrogateHighStart && rval <= kSurrogateHighEnd) { // check that 2 more bytes are available CHECK_AVAIL(2); UInt32 low = *textPtr++ << 8; low += *textPtr++; rval = ((rval - kSurrogateHighStart) << halfShift) + (low - kSurrogateLowStart) + halfBase; } break; case kForm_UTF16LE: CHECK_AVAIL(2); rval = *textPtr++; rval += *textPtr++ << 8; if (rval >= kSurrogateHighStart && rval <= kSurrogateHighEnd) { CHECK_AVAIL(2); UInt32 low = *textPtr++; low += *textPtr++ << 8; rval = ((rval - kSurrogateHighStart) << halfShift) + (low - kSurrogateLowStart) + halfBase; } break; case kForm_UTF32BE: CHECK_AVAIL(4); rval = *textPtr++ << 24; rval += *textPtr++ << 16; rval += *textPtr++ << 8; rval += *textPtr++; break; case kForm_UTF32LE: CHECK_AVAIL(4); rval = *textPtr++; rval += *textPtr++ << 8; rval += *textPtr++ << 16; rval += *textPtr++ << 24; break; } return rval;}voidCompiler::ungetChar(UInt32 c){ ungotten = c;}boolCompiler::GetNextToken(){ UInt32 currCh; if (defIter != defEnd) { tok = *defIter; defIter++; return true; } if (textPtr == textEnd) { ++textPtr; tok.type = tok_Newline; return true; } if (textPtr >= textEnd) return false; while (true) { SkipSpaces(); tokStart = textPtr; if (textPtr == textEnd) { ++textPtr; tok.type = tok_Newline; ++lineNumber; return true; } if (textPtr > textEnd) return false; currCh = getChar(); switch (currCh) { case '\r': if (textPtr < textEnd) { currCh = getChar(); if (currCh != '\n') ungetChar(currCh); } tok.type = tok_Newline; ++lineNumber; return true; case '\n': if (textPtr < textEnd) { currCh = getChar(); if (currCh != '\r') ungetChar(currCh); } tok.type = tok_Newline; ++lineNumber; return true; case '\\': if (textPtr < textEnd) { currCh = getChar(); if (currCh == '\r' || currCh == '\n') { if (textPtr < textEnd) { UInt32 nextCh = getChar(); if (!((currCh == '\r' && nextCh == '\n') || (currCh == '\n' && nextCh == '\r'))) ungetChar(nextCh); } ++lineNumber; continue; } ungetChar(currCh); } goto DEFAULT; case '"': case '\'': { UInt32 delimiter = currCh;#ifdef __MWERKS__ tok.strval.clear();#else tok.strval.erase(tok.strval.begin(), tok.strval.end());#endif while ((textPtr < textEnd) && ((currCh = getChar()) != delimiter) && (currCh != '\r') && (currCh != '\n')) tok.strval.append(1, currCh); tok.type = tok_String; if (currCh == '\r' || currCh == '\n') ungetChar(currCh); } return true; case '^': case '(': case ')': case '[': case ']': case '{': case '}': case ',': case '+': case '*': case '?': case '>': case '#': case '|': case '/': case '=': case '@': tok.type = (tokenType)currCh; return true; case '<': tok.type = (tokenType)'<'; if (textPtr < textEnd) if ((currCh = getChar()) == '>') tok.type = tok_Map; else ungetChar(currCh); return true; case '.': tok.type = (tokenType)'.'; if (textPtr < textEnd) if ((currCh = getChar()) == '.') tok.type = tok_Ellipsis; else ungetChar(currCh); return true; case '_': if (textPtr < textEnd) { currCh = getChar(); ungetChar(currCh); if (isIDcont(currCh)) { currCh = '_'; goto DEFAULT; } } tok.type = (tokenType)'_'; return true; case '0': if (textPtr < textEnd) { currCh = getChar(); if (currCh == 'x' || currCh == 'X') { tok.type = tok_Number; tok.val = 0; while (textPtr < textEnd) { currCh = getChar(); if (currCh >= '0' && currCh <= '9') tok.val = tok.val * 16 + currCh - '0'; else if (currCh >= 'a' && currCh <= 'f') tok.val = tok.val * 16 + currCh - 'a' + 10; else if (currCh >= 'A' && currCh <= 'F') tok.val = tok.val * 16 + currCh - 'A' + 10; else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -