📄 stafconverter.cpp
字号:
case 4: return &leafPtr[key[0] << 2]; default: break; } break; case 2: switch (fSizeOfVal) { case 1: leafPtr = nextPtr->leaf[key[0]]; return &leafPtr[key[1]]; case 2: leafPtr = nextPtr->leaf[key[0]]; return &leafPtr[key[1] << 1]; case 4: leafPtr = nextPtr->leaf[key[0]]; return &leafPtr[key[1] << 2]; default: break; } break; case 4: switch (fSizeOfVal) { case 1: nextPtr = nextPtr->node[key[0]]; nextPtr = nextPtr->node[key[1]]; leafPtr = nextPtr->leaf[key[2]]; return &leafPtr[key[3]]; case 2: nextPtr = nextPtr->node[key[0]]; nextPtr = nextPtr->node[key[1]]; leafPtr = nextPtr->leaf[key[2]]; return &leafPtr[key[3] << 1]; case 4: nextPtr = nextPtr->node[key[0]]; nextPtr = nextPtr->node[key[1]]; leafPtr = nextPtr->leaf[key[2]]; return &leafPtr[key[3] << 2]; default: break; } break; default: break; } int i = 0; // uncommon cases get here // walk down until last node (one level before the leaf) for (i = 0; i < fSizeOfKey - 2; i++) nextPtr = nextPtr->node[key[i]]; // now get the leaf ... leafPtr = nextPtr->leaf[key[i]]; // scale by size of value and return return &leafPtr[ key[fSizeOfKey - 1] * fSizeOfVal ];}///////////////////////////////////////////////////////////////////////////////const unsigned int SIGNATURE = 0xDEADC0DE; // bin file signatureconst unsigned int INVALID_STRING = 1; // error codeconst unsigned int NOT_IMPLEMENTED = 2; // error codestatic const char SIZE_TABLE[] ={ // This table allows for O(1) lookup of a char size. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};///////////////////////////////////////////////////////////////////////////////const char *kAliasNamePtr = "alias.txt";const char *kDefaultCodePagePtr = "LATIN_1";const char *kDefaultConvDirPtr = "/usr/local/staf/codepage";bool STAFConverter::sAliasCreated = false;char *STAFConverter::sConvDirPtr = 0;std::map<std::string, std::string> STAFConverter::sAliasTable;///////////////////////////////////////////////////////////////////////////////STAFConverter::STAFConverter(char *cpName, FileType fileType){ pC2UData = pU2CData = 0; memset(&fHeader, 0, sizeof(fHeader)); memset((char *)&fCharSize, 0, sizeof(fCharSize)); fHeader.max_uni_size = MAX_UNI_CHAR_SIZE; fHeader.def_uni_char[MAX_UNI_CHAR_SIZE - 1] = 0x2f; if (fileType == kUCM) { fromUCMFile(std::string(cpName)); return; } if (cpName == 0) { cpName = determineCodePage(); } if (sConvDirPtr == 0) { sConvDirPtr = determineConvDir(); } if (sAliasCreated == false) { createAliasTable(); sAliasCreated = true; } std::string lowerCPName(cpName); for(int i = 0; i < lowerCPName.length(); ++i) lowerCPName[i] = tolower(lowerCPName[i]); // Note: we need the 'C' string from lowerCPName, otherwise // the hashtable key is a different type and would not find // the value being looked up std::string convName = sAliasTable[lowerCPName.c_str()]; std::string finalName; if (convName == "LATIN_1") { fHeader.conv_class = kLATIN1; fC2UFunc = &STAFConverter::fromLATIN1; fU2CFunc = &STAFConverter::toLATIN1; return; } else if (convName == "UTF8") { fHeader.conv_class = kUTF8; fC2UFunc = &STAFConverter::fromUTF8; fU2CFunc = &STAFConverter::toUTF8; return; } else if (convName == "") { finalName = lowerCPName; } else { finalName = convName; } if (fromBINFile(finalName)) { fHeader.conv_class = kLATIN1; fC2UFunc = &STAFConverter::fromLATIN1; fU2CFunc = &STAFConverter::toLATIN1; cerr << "WARNING: Defaulting to " << kDefaultCodePagePtr << endl; }}STAFConverter::~STAFConverter(){ if (pU2CData) delete pU2CData; if (pC2UData) delete pC2UData;}char *STAFConverter::determineCodePage(){ // try to determine the codepage static char codePageBuffer[32]; // STAFCODEPAGEOVERRIDE is only used to force a codepage to be // selected. It should not be documented since it is for inter- // nal use only. char *cpName = getenv("STAFCODEPAGEOVERRIDE"); // if not set, then ask the OS which codepage we are operating // on if (cpName == 0 || strlen(cpName) == 0) { cpName = STAFUtilGetCurrentProcessCodePage(codePageBuffer); } // if not set, then check if the STAFCODEPAGE env variable is // set if (cpName == 0 || strlen(cpName) == 0) { cpName = getenv("STAFCODEPAGE"); } // if not set, then put a warning and assume the system we are // working on used the default codepage (LATIN_1) if (cpName == 0 || strlen(cpName) == 0) { // if no system code page found, set it to default and warn user cerr << "WARNING: Could not determine codepage." << endl << "env STAFCODEPAGE not set; defaulting to " << kDefaultCodePagePtr << endl; cpName = (char *)kDefaultCodePagePtr; } return cpName;}char *STAFConverter::determineConvDir(){ char *convDir = getenv("STAFCONVDIR"); // if STAFCONVDIR is not set, we have no means to know were the code- // page files and the alias file are located, so assume a directory. if (convDir == 0 || strlen(convDir) == 0) { /* WE SHOULD NOT SPIT OFF THIS WARNING cerr << "WARNING: Could not determine codepage directory." << endl << "env STAFCONVDIR not set; defaulting to " << kDefaultConvDirPtr << endl; */ convDir = (char *)kDefaultConvDirPtr; } return convDir;}unsigned int STAFConverter::encodeUTF8(const unsigned char *uniChar, unsigned char *buffer){ if (uniChar == 0 || buffer == 0) return 0; // Table of UTF-8 Encoding (this is how we encode UCS2) // ---------------------------------------------------- // Bytes Bits Encoding // 1 7 0bbbbbbb // 2 11 110bbbbb 10bbbbbb // 3 16 1110bbbb 10bbbbbb 10bbbbbb // ---------------------------------------------------- // e.g. UNI 0x00B8 == UTF-8 11000010 10111000 // c 2 b 8 // UNI 0x001C == UTF-8 00011100 // 1 c // ---------------------------------------------------- unsigned short encChar = (unsigned short)((uniChar[0] << 8) | (uniChar[1])); /* XXX: remove this cout << "UNICODE CHAR = 0x" << std::hex << encChar << endl; cout << "UNI[0] = " << (unsigned)(uniChar[0] & 0xff) << " " << "UNI[1] = " << (unsigned)(uniChar[1] & 0xff) << endl; */ // do conversion if (encChar < 0x0080) { buffer[0] = uniChar[1]; return 1; } if (encChar < 0x0800) { buffer[0] = (0xc0 | (uniChar[0] << 2) | (uniChar[1] >> 6)); buffer[1] = (0x80 | (uniChar[1] & 0x3f)); return 2; } if (encChar <= 0xffff) { buffer[0] = (0xe0 | (uniChar[0] >> 4)); buffer[1] = (0x80 | ((uniChar[0] & 0x0f) << 2) | (uniChar[1] >> 6)); buffer[2] = (0x80 | (uniChar[1] & 0x3f)); return 3; } return 0;}unsigned int STAFConverter::decodeUTF8(const unsigned char *utfChar, unsigned char *buffer){ unsigned int size = SIZE_TABLE[utfChar[0]]; if (size == 1) { buffer[0] = 0; buffer[1] = utfChar[0]; return 2; } if (size == 2) { buffer[0] = ((utfChar[0] & 0x1c) >> 2); buffer[1] = ((utfChar[0] & 0x03) << 6) | (utfChar[1] & 0x3f); return 2; } if (size == 3) { buffer[0] = ((utfChar[0] & 0x0f) << 4) | ((utfChar[1] & 0x3c) >> 2); buffer[1] = ((utfChar[1] & 0x03) << 6) | (utfChar[2] & 0x3f); return 2; } return 0;}unsigned int STAFConverter::fromUCMFile(std::string converterName){ std::string ucmName = converterName + ".ucm"; fstream ucmFile(ucmName.c_str(), ios::in); if (!ucmFile) { cerr << "Could not open file " << ucmName.c_str() << endl; return 1; } static const unsigned int SIZE = 1024; char fileLine[SIZE]; bool inCharMap = false; // map files are required to be in .ucm format (ucm format is the format // provided by the IBM Classes for Unicode), where lines starting with // # are comments, lines containing <Uabcd> XY are value pairs that are // used to construct the binary file; other lines MAY simply be ignored. while (!ucmFile.eof()) { fileLine[0] = 0; ucmFile.getline(fileLine, SIZE); std::string line(fileLine); for (int i = 0; (i < SIZE) && (fileLine[i] != 0); i++) { if (isspace(fileLine[i])) continue; if (fileLine[i] == '#') break; if (strncmp(fileLine, "CHARMAP", 7) == 0) { inCharMap = true; break; } if (fileLine[i] == '<') { char var[32]; char val[32];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -