📄 stafconverter.cpp
字号:
i++; // skip opening < if (!inCharMap) { // while it is not a closing > or a space or a new // line/carriage return, set var int j = 0; for (; (i < SIZE) && (fileLine[i] != 0) && (fileLine[i] != '>') && (!isspace(fileLine[i])); i++, j++) var[j] = fileLine[i]; var[j] = 0; // while it is a space or a closing > or an opening " // skip while ((i < SIZE) && (fileLine[i] != 0) && ((isspace(fileLine[i])) || (fileLine[i] == '"') || (fileLine[i] == '>'))) i++; // while it is not a space and not a closing " and // not a new line/carriage return, set val int k = 0; for (; (i < SIZE) && (fileLine[i] != 0) && (!isspace(fileLine[i])) && (fileLine[i] != '"'); i++, k++) val[k] = fileLine[i]; val[k] = 0; // now we actually set the values in the static file // header so that we can later write it to the binary // file if (strcmp(var, "code_set_name") == 0) { strncpy((char *)fHeader.code_set_name, val, 11); } else if (strcmp(var, "uconv_class") == 0) { fHeader.conv_class = kUNKNOWN; if (strncmp(val, "SBCS", 4) == 0) { fHeader.conv_class = kSBCS; fC2UFunc = &STAFConverter::fromSBCS; fU2CFunc = &STAFConverter::toSBCS; } else if (strncmp(val, "DBCS", 4) == 0) { fHeader.conv_class = kDBCS; fC2UFunc = &STAFConverter::fromDBCS; fU2CFunc = &STAFConverter::toDBCS; } else if (strncmp(val, "MBCS", 4) == 0) { fHeader.conv_class = kMBCS; fC2UFunc = &STAFConverter::fromMBCS; fU2CFunc = &STAFConverter::toMBCS; } else if (strncmp(val, "EBCD", 4) == 0) { fHeader.conv_class = kEBCDIC; fC2UFunc = &STAFConverter::fromEBCDIC; fU2CFunc = &STAFConverter::toEBCDIC; } } else if (strcmp(var, "mb_cur_min") == 0) { /* ignore, we never use this field */ } else if (strcmp(var, "mb_cur_max") == 0) { fHeader.max_cpg_size = atoi(val); } else if (strcmp(var, "subchar") == 0) { // at this point we are still reading the ucm file's // header. we are going to setup the default charac- // ter block for the codepage represented j = 0, k = 0; int charLen = 0; while (val[j] == '\\') { j += 2; val[k++] = val[j++]; val[k++] = val[j++]; charLen++; } val[k] = 0; // we have the default codepage character as a hex // string in 'val' (length of val depends on class // of char: e.g. if class is SBCS, len of val is 2 // (like "A2" which represents 1 byte) for DBCS is // 4 (like "E5F1" which represents 2 bytes), etc. unsigned long defCpgVal = strtoul(val, 0, 16); for (k = charLen - 1; k >= 0; k--, defCpgVal >>= 8) { fHeader.def_cpg_char[k] = defCpgVal & 0xff; } fCharSize[fHeader.def_cpg_char[0]] = (unsigned char)charLen; } // end of else if "subchar" } else // in Char Map { if (pC2UData == 0) { pC2UData = new CompactTree(fHeader.max_cpg_size, fHeader.max_uni_size, (const unsigned char *) &fHeader.def_uni_char); pU2CData = new CompactTree(fHeader.max_uni_size, fHeader.max_cpg_size, (const unsigned char *) &fHeader.def_cpg_char); } i++; // skip U from <Uabcd> // get hex unicode value into var var[0] = fileLine[i++]; var[1] = fileLine[i++]; var[2] = fileLine[i++]; var[3] = fileLine[i++]; var[4] = 0; // while it is a space or a closing '>' skip while ((i < SIZE) && (fileLine[i] != 0) && (isspace(fileLine[i]) || (fileLine[i] == '>'))) i++; // assumption is that we have encountered the cpg char so // get 2 hex digits of each, repeat until space is found int j = 0; int charLen = 0; while (fileLine[i] == '\\') { i += 2; val[j++] = fileLine[i++]; val[j++] = fileLine[i++]; charLen++; } val[j] = 0; unsigned long uniVal = strtoul(var, 0, 16); unsigned long cpgVal = strtoul(val, 0, 16); unsigned long saveCpgVal = cpgVal; unsigned long saveUniVal = uniVal; unsigned char uniBuffer [MAX_UNI_CHAR_SIZE] = { 0 }; unsigned char cpgKeyBuffer[MAX_CPG_CHAR_SIZE] = { 0 }; unsigned char cpgValBuffer[MAX_CPG_CHAR_SIZE] = { 0 }; int k; for (k = MAX_UNI_CHAR_SIZE - 1; k >= 0; k--, uniVal >>= 8) { uniBuffer[k] = (unsigned char)(uniVal & 0xff); } // BYTE-ORDER MANIPULATION: // Note: this creates the codepage key and value which // are different because it truly impacts in the size // of the binary file being created. Different strate- // gies for 1 particular file gave a 20Mb, a 10Mb, and // ultimately a 256kb binary file. What we do here is // rearrange the codepage character bytes when it acts // as a key and when it acts as the value being stored. // For example, if we are dealing with MBCS of size 4, // and a given character consists of only 2 bytes, the // ls-bytes (i.e. byte[0] and byte[1]) must be zeroes // when the char is acting as key, so that it can take // the same path as other 2-byte keys. This does not // happen for unicode since they are all 2 bytes. // Lets review this manipulation of bytes with an ex- // ample where again the size of the MBCS char is at // most 4 bytes: // MBCS char = { 0x2e, 0xff } // key representation = { 0x00, 0x00, 0x2e, 0xff } // val representation = { 0x2e, 0xff, 0x00, 0x00 } // The val representation helps determine in faster // time the exact size of the char, since we use the // ls-byte as an index into the fCharLen table, and // the key representation helps all other keys whose // size is also 2 take the same path in the compact // tree being used for storage. int m; for (k = fHeader.max_cpg_size - 1, m = charLen - 1; k >= 0; k--, m--, cpgVal >>= 8) { cpgKeyBuffer[k] = (unsigned char)(cpgVal & 0xff); if (m >= 0) cpgValBuffer[m] = (unsigned char)(cpgVal & 0xff); } fCharSize[cpgValBuffer[0]] = (unsigned char)charLen; // Note: This makes Won/Yen map to "\" static const unsigned char sCpgValBuffer5c [MAX_CPG_CHAR_SIZE] = { 0x5c, 0x00 }; static const unsigned char sUniBuffer005c [MAX_UNI_CHAR_SIZE] = { 0x00, 0x5c }; #if STAF_OS_NAME_ZOS // Worrying about backslashes (0x5c) in the conversion // is only an issue on non-EBCDIC systems. pC2UData->put(cpgKeyBuffer, uniBuffer); pU2CData->put(uniBuffer, cpgValBuffer);#else if (saveCpgVal == 0x5c) pC2UData->put(cpgKeyBuffer, sUniBuffer005c); else pC2UData->put(cpgKeyBuffer, uniBuffer); if (saveUniVal == 0x005C) pU2CData->put(uniBuffer, sCpgValBuffer5c); else pU2CData->put(uniBuffer, cpgValBuffer);#endif } // end of in Char Map } // end of in '<' } // end of for loop } // end of while != eof // sign the header and serialize the necessary data // in a binary file which must be read in same order fHeader.signature = SIGNATURE; std::string binName = converterName + ".bin"; fstream binFile(binName.c_str(), ios::out | STAF_ios_binary); if (!binFile) { cerr << "Could not open file " << binName.c_str() << endl; return 1; } binFile.write((char *)&fHeader, sizeof(fHeader)); pC2UData->serialize(binFile); pU2CData->serialize(binFile); if (fHeader.conv_class == kMBCS) { binFile.write((char *)&fCharSize, sizeof(fCharSize)); } binFile.close(); return 0;} unsigned int STAFConverter::createAliasTable() { std::string aliasName = std::string(sConvDirPtr) + "/" + std::string(kAliasNamePtr); fstream aliasFile(aliasName.c_str(), ios::in); if (!aliasFile) { cerr << "Could not open file " << aliasName.c_str() << endl; return 1; } // insert the default codepage first to map to itself sAliasTable[std::string(kDefaultCodePagePtr)] = std::string(kDefaultCodePagePtr); const unsigned int SIZE = 1024; char fileLine[SIZE]; while (!aliasFile.eof()) { fileLine[0] = 0; aliasFile.getline(fileLine, SIZE); std::string line(fileLine); for (int i = 0; (i < SIZE) && (fileLine[i] != 0); i++) { if (isspace(fileLine[i])) continue; if (fileLine[i] == '#') break; // read the value, then all the different keys int j; char val[32]; for (j = 0; !isspace(fileLine[i]); j++) val[j] = fileLine[i++]; val[j] = 0; // add the value as a key to itself std::string theKey = val; std::string theVal = val; sAliasTable[theKey] = theVal; char key[32]; // add the keys found in the same text line for (; (i < SIZE) && (fileLine[i] != 0); i++) { if (isspace(fileLine[i])) continue; if (fileLine[i] == '#') break; for (j = 0; (fileLine[i] != 0) && (!isspace(fileLine[i])); i++, j++) key[j] = fileLine[i]; key[j] = 0; i--; // adjust (won't put as on infinite loop) theKey = key; theVal = val; sAliasTable[theKey] = theVal; } break; } } aliasFile.close(); return 0;}unsigned int STAFConverter::fromBINFile(std::string converterName){ std::string binName = std::string(sConvDirPtr) + "/" + converterName + ".bin"; fstream binFile(binName.c_str(), ios::in | STAF_ios_binary); if (!binFile) { cerr << "Could not open file " << binName.c_str() << endl; return 1; } binFile.read((char *)&fHeader, sizeof(fHeader)); if (fHeader.signature != SIGNATURE) { cerr << "File " << binName.c_str() << " is corrupted" << endl; return 1; } pC2UData = new CompactTree(); pU2CData = new CompactTree(); pC2UData->deserialize(binFile); pU2CData->deserialize(binFile); if (fHeader.conv_class == kMBCS) { binFile.read((char *)&fCharSize, sizeof(fCharSize)); } binFile.close();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -