📄 stafconverter.cpp
字号:
switch (fHeader.conv_class) { case kSBCS: fC2UFunc = &STAFConverter::fromSBCS; fU2CFunc = &STAFConverter::toSBCS; break; case kDBCS: fC2UFunc = &STAFConverter::fromDBCS; fU2CFunc = &STAFConverter::toDBCS; break; case kMBCS: fC2UFunc = &STAFConverter::fromMBCS; fU2CFunc = &STAFConverter::toMBCS; break; case kEBCDIC: fC2UFunc = &STAFConverter::fromEBCDIC; fU2CFunc = &STAFConverter::toEBCDIC; break; default: cerr << "Unknown format for file " << binName.c_str() << endl; return 2; } return 0;}unsigned int STAFConverter::convertToUTF8(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ // srclen points to the number of bytes taken up by src, and trglen // points to the number of bytes allocated for trg. on return, src // will point to the first byte in src that was not converted, src- // len will contain the number of bytes left to be converted, and // trglen will contain the number of bytes used from trg. if (fHeader.conv_class == kUNKNOWN) fC2UFunc = &STAFConverter::fromLATIN1; return (this->*fC2UFunc)(src, srclen, trg, trglen);}unsigned int STAFConverter::convertFromUTF8(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ // srclen points to the number of bytes taken up by src, and trglen // points to the number of bytes allocated for trg. on return, src // will point to the first byte in src that was not converted, src- // len will contain the number of bytes left to be converted, and // trglen will contain the number of bytes stored in trg. if (fHeader.conv_class == kUNKNOWN) fU2CFunc = &STAFConverter::toLATIN1; return (this->*fU2CFunc)(src, srclen, trg, trglen);}unsigned int STAFConverter::fromLATIN1(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ // Note: this function converts from the local filesystem character // set to the corresponding USC code and then converts the USC character // code to the UTF-8 encoding. For Latin1 (ISO 8859-1:1998), to convert // the local character set to the corresponding USC code, it simply // creates a 2-character key: key[0] contains x00 and key[1] contains // the local character set character // e.g. 0x00 -> 0x0000, 0x01 -> 0x0001, ..., 0x0FF -> 0x00FF int convlen = STAF_MIN(*srclen, *trglen); int roomLeftInBuffer = *trglen; *trglen = 0; unsigned char key[MAX_UNI_CHAR_SIZE] = { 0 }; while (convlen > 0 && (roomLeftInBuffer > (MAX_UTF8_CHAR_SIZE - 1))) { key[1] = **src; register unsigned len = encodeUTF8(key, trg); if (len == 0) return INVALID_STRING; (*src)++; trg += len; (*srclen)--; (*trglen) += len; convlen--; roomLeftInBuffer -= len; } return 0;}unsigned int STAFConverter::fromUTF8(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ // Note: this function simply copies the UTF-8 chars from // src into trg and modifies the counts as appropri- // ate. int convlen = STAF_MIN(*srclen, *trglen); *trglen = 0; memcpy(trg, *src, convlen); *src += convlen; *srclen -= convlen; *trglen += convlen; return 0;}unsigned int STAFConverter::fromSBCS(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ int convlen = STAF_MIN(*srclen, *trglen); int roomLeftInBuffer = *trglen; *trglen = 0; while (convlen > 0 && (roomLeftInBuffer > (MAX_UTF8_CHAR_SIZE - 1))) { register unsigned len = encodeUTF8(pC2UData->get(*src), trg); if (len == 0) return INVALID_STRING; (*src)++; trg += len; (*srclen)--; (*trglen) += len; convlen--; roomLeftInBuffer -= len; } return 0;}unsigned int STAFConverter::fromDBCS(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ int convlen = STAF_MIN(*srclen, *trglen); int roomLeftInBuffer = *trglen; *trglen = 0; while (convlen > 0 && (roomLeftInBuffer > (MAX_UTF8_CHAR_SIZE - 1))) { register unsigned len = encodeUTF8(pC2UData->get(*src), trg); if (len == 0) return INVALID_STRING; (*src) += 2; trg += len; (*srclen) -= 2; (*trglen) += len; convlen -= 2; roomLeftInBuffer -= len; } return 0;}unsigned int STAFConverter::fromMBCS(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ int convlen = STAF_MIN(*srclen, *trglen); int roomLeftInBuffer = *trglen; *trglen = 0; register unsigned m = fHeader.max_cpg_size; while (convlen > 0 && (roomLeftInBuffer > (MAX_UTF8_CHAR_SIZE - 1))) { // this must be initialized with zeroes on each iteration. // this only happens in fromMBCS and the reason for this // has to do with the way we use cpg characters as keys in // order to save space. a note on this is provided above. // Grep on BYTE-ORDER MANIPULATION for more info. unsigned char key[MAX_CPG_CHAR_SIZE] = { 0 }; register unsigned size = fCharSize[**src]; if (size == 0) return INVALID_STRING; register unsigned i = m - size; while (i < m) key[i++] = *((*src)++); register unsigned len = encodeUTF8(pC2UData->get(key), trg); trg += len; (*srclen) -= size; (*trglen) += len; convlen -= size; roomLeftInBuffer -= len; } return 0;}unsigned int STAFConverter::fromEBCDIC(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ // XXX: Note yet implemented!!! return NOT_IMPLEMENTED;}unsigned int STAFConverter::toLATIN1(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ int convlen = STAF_MIN(*srclen, *trglen); *trglen = 0; unsigned char key[MAX_UNI_CHAR_SIZE] = { 0 }; while (convlen > 0) { register unsigned len = SIZE_TABLE[**src]; if (len == 0) return INVALID_STRING; decodeUTF8(*src, key); // If the first character is 0, assign the second character. // If the first character is not 0, assign ? since only support 1 char *(trg++)= (key[0] == 0) ? key[1] : 0x3F; // 0x3F = Question mark (*src) += len; (*srclen) -= len; (*trglen)++; convlen -= len; } return 0;}unsigned int STAFConverter::toUTF8(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ // Note: we simply call fromUTF8, it's the same algorithm!!! return fromUTF8(src, srclen, trg, trglen);}unsigned int STAFConverter::toSBCS(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ int convlen = STAF_MIN(*srclen, *trglen); int roomLeftInBuffer = *trglen; *trglen = 0; unsigned char key[MAX_UNI_CHAR_SIZE] = { 0 }; while (convlen > 0 && (roomLeftInBuffer > (MAX_CPG_CHAR_SIZE - 1))) { register unsigned len = SIZE_TABLE[**src]; if (len == 0) return INVALID_STRING; decodeUTF8(*src, key); register const unsigned char *cpgChar = pU2CData->get(key); *(trg++) = *(cpgChar); (*src) += len; (*srclen) -= len; (*trglen)++; convlen -= len; roomLeftInBuffer -= len; } return 0;}unsigned int STAFConverter::toDBCS(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ int convlen = STAF_MIN(*srclen, *trglen); int roomLeftInBuffer = *trglen; *trglen = 0; unsigned char key[MAX_UNI_CHAR_SIZE] = { 0 }; while (convlen > 0 && (roomLeftInBuffer > (MAX_CPG_CHAR_SIZE - 1))) { register unsigned len = SIZE_TABLE[**src]; if (len == 0) return INVALID_STRING; decodeUTF8(*src, key); register const unsigned char *cpgChar = pU2CData->get(key); *(trg++) = *(cpgChar++); *(trg++) = *(cpgChar); (*src) += len; (*srclen) -= len; (*trglen) += 2; convlen -= len; roomLeftInBuffer -= len; } return 0;}unsigned int STAFConverter::toMBCS(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ int convlen = STAF_MIN(*srclen, *trglen); int roomLeftInBuffer = *trglen; *trglen = 0; unsigned char key[MAX_UNI_CHAR_SIZE] = { 0 }; while (convlen > 0 && (roomLeftInBuffer > (MAX_CPG_CHAR_SIZE - 1))) { register unsigned len = SIZE_TABLE[**src]; decodeUTF8(*src, key); register const unsigned char *cpgChar = pU2CData->get(key); register unsigned size = fCharSize[cpgChar[0]]; if (size == 0) return INVALID_STRING; memcpy(trg, cpgChar, size); trg += size; (*src) += len; (*srclen) -= len; (*trglen) += size; convlen -= len; roomLeftInBuffer -= len; } return 0;}unsigned int STAFConverter::toEBCDIC(const unsigned char **src, unsigned int *srclen, unsigned char *trg, unsigned int *trglen){ // XXX: not yet implemented return NOT_IMPLEMENTED;}///////////////////////////////////////////////////////////////////////////////
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -