⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 avc_mbyte.c

📁 用于读取TAB、MIF、SHP文件的类
💻 C
📖 第 1 页 / 共 2 页
字号:
        psDBCSInfo->nDBCSBufSize = nMaxOutputLen+2;        psDBCSInfo->pszDBCSBuf =             (unsigned char *)CPLRealloc(psDBCSInfo->pszDBCSBuf,                                        psDBCSInfo->nDBCSBufSize*                                        sizeof(unsigned char));    }    /* Do the conversion according to current code page      */    switch (psDBCSInfo->nDBCSCodePage)    {      case AVC_DBCS_JAPANESE:        pszOutBuf = (char*)_AVCArcDBCS2JapaneseShiftJIS(psDBCSInfo,                                                (const unsigned char *)pszLine,                                                        nMaxOutputLen);        break;      default:        /* We should never get here anyways, but just in case return pszLine          */        pszOutBuf = pszLine;    }        return pszOutBuf;}/*===================================================================== *===================================================================== * Functions Specific to Japanese encoding (CodePage 932).   * * For now we assume that we can receive only Katakana, Shift-JIS, or EUC * encoding as input.  Coverages use EUC encoding in most cases, except * for Katakana characters that are prefixed with a 0x8e byte. * * Most of the Japanese conversion functions are based on information and * algorithms found at: *  http://www.mars.dti.ne.jp/~torao/program/appendix/japanese-en.html *===================================================================== *====================================================================*//********************************************************************** *                          _AVCDetectJapaneseEncoding() * * Scan a line of text to try to establish the type of japanese encoding * * Returns the encoding number (AVC_CODE_JAP_*), or AVC_CODE_UNKNOWN if no * specific encoding was detected. **********************************************************************/#define IS_JAP_SHIFTJIS_1(c)  ((c) >= 0x81 && (c) <= 0x9f)#define IS_JAP_SHIFTJIS_2(c)  (((c) >= 0x40 && (c) <= 0x7e) ||   \                               ((c) >= 0x80 && (c) <= 0xA0) )#define IS_JAP_EUC_1(c)       ((c) >= 0xF0 && (c) <= 0xFE)#define IS_JAP_EUC_2(c)       ((c) >= 0xFD && (c) <= 0xFE)#define IS_JAP_KANA(c)        ((c) >= 0xA1 && (c) <= 0xDF)static int _AVCDetectJapaneseEncoding(const unsigned char *pszLine){    int nEncoding = AVC_CODE_UNKNOWN;    for( ; nEncoding == AVC_CODE_UNKNOWN && pszLine && *pszLine; pszLine++)    {        if (IS_ASCII(*pszLine))            continue;        else if (IS_JAP_SHIFTJIS_1(*pszLine))        {            nEncoding = AVC_CODE_JAP_SHIFTJIS;            break;        }        else if (IS_JAP_KANA(*pszLine) && *(pszLine+1) &&                 (IS_ASCII(*(pszLine+1)) ||                   (*(pszLine+1)>=0x80 && *(pszLine+1)<=0xA0) ) )        {            nEncoding = AVC_CODE_JAP_SHIFTJIS; /* SHIFT-JIS + Kana */            break;        }        else if (IS_JAP_EUC_1(*pszLine))        {            nEncoding = AVC_CODE_JAP_EUC;            break;        }        if (*(++pszLine) == '\0')            break;        if (IS_JAP_SHIFTJIS_2(*pszLine))        {            nEncoding = AVC_CODE_JAP_SHIFTJIS;            break;        }        else if (IS_JAP_EUC_2(*pszLine))        {            nEncoding = AVC_CODE_JAP_EUC;            break;        }    }    return nEncoding;}/********************************************************************** *                          _AVCJapanese2ArcDBCS() * * Try to detect type of Japanese encoding if not done yet, and convert * string from Japanese to proper coverage DBCS encoding. **********************************************************************/static const char *_AVCJapanese2ArcDBCS(AVCDBCSInfo *psDBCSInfo,                                         const unsigned char *pszLine,                                        int nMaxOutputLen){    unsigned char *pszOut;    int iDst;    pszOut = psDBCSInfo->pszDBCSBuf;    if (psDBCSInfo->nDBCSEncoding == AVC_CODE_UNKNOWN)    {        /* Type of encoding (Shift-JIS or EUC) not known yet... try to         * detect it now.         */        psDBCSInfo->nDBCSEncoding = _AVCDetectJapaneseEncoding(pszLine);/*        if (psDBCSInfo->nDBCSEncoding == AVC_CODE_JAP_SHIFTJIS)        {            printf("Found Japanese Shift-JIS encoding\n");        }        else if (psDBCSInfo->nDBCSEncoding == AVC_CODE_JAP_EUC)        {            printf("Found Japanese EUC encoding\n");        }*/    }    for(iDst=0; *pszLine && iDst < nMaxOutputLen; pszLine++)    {        if (IS_ASCII(*pszLine))        {            /* No transformation required for ASCII */            pszOut[iDst++] = *pszLine;        }        else if ( psDBCSInfo->nDBCSEncoding==AVC_CODE_JAP_EUC && *(pszLine+1) )        {            /* This must be a pair of EUC chars and both should be in             * the range 0xA1-0xFE             */            pszOut[iDst++] = *(pszLine++);            pszOut[iDst++] = *pszLine;        }        else if ( IS_JAP_KANA(*pszLine) )        {            /* Katakana char. prefix it with 0x8e */            pszOut[iDst++] = 0x8e;            pszOut[iDst++] = *pszLine;        }        else if ( *(pszLine+1) )        {            /* This must be a pair of Shift-JIS chars... convert them to EUC             *             * If we haven't been able to establish the encoding for sure             * yet, then it is possible that a pair of EUC chars could be             * treated as shift-JIS here... but there is not much we can do             * about that unless we scan the whole E00 input before we             * start the conversion.             */            unsigned char leader, trailer;            leader = *(pszLine++);            trailer = *pszLine;            if(leader <= 0x9F)  leader -= 0x71;            else                leader -= 0xB1;            leader = (leader << 1) + 1;            if(trailer > 0x7F)  trailer --;            if(trailer >= 0x9E)            {                trailer -= 0x7D;                leader ++;            }            else             {                trailer -= 0x1F;            }              pszOut[iDst++] = leader | 0x80;            pszOut[iDst++] = trailer | 0x80;        }        else        {            /* We should never get here unless a double-byte pair was              * truncated... but just in case...             */            pszOut[iDst++] = *pszLine;        }    }    pszOut[iDst] = '\0';    return psDBCSInfo->pszDBCSBuf;}/********************************************************************** *                          _AVCArcDBCS2JapaneseShiftJIS() * * Convert string from coverage DBCS (EUC) to Japanese Shift-JIS. * * We know that binary coverages use a custom EUC encoding for japanese * which is EUC + all Katakana chars are prefixed with 0x8e.  So this * function just does a simple conversion. **********************************************************************/static const char *_AVCArcDBCS2JapaneseShiftJIS(AVCDBCSInfo *psDBCSInfo,                                                 const unsigned char *pszLine,                                                int nMaxOutputLen){    unsigned char *pszOut;    int iDst;    pszOut = psDBCSInfo->pszDBCSBuf;    for(iDst=0; *pszLine && iDst < nMaxOutputLen; pszLine++)    {        if (IS_ASCII(*pszLine))        {            /* No transformation required for ASCII */            pszOut[iDst++] = *pszLine;                    }        else if (*pszLine == 0x8e && *(pszLine+1))        {            pszLine++;  /* Flush the 0x8e */            pszOut[iDst++] = *pszLine;        }        else if (*(pszLine+1))        {            /* This is a pair of EUC chars... convert them to Shift-JIS              */            unsigned char leader, trailer;            leader  = *(pszLine++) & 0x7F;            trailer = *pszLine & 0x7F;            if((leader & 0x01) != 0)    trailer += 0x1F;            else                        trailer += 0x7D;            if(trailer >= 0x7F)         trailer ++;                leader = ((leader - 0x21) >> 1) + 0x81;            if(leader > 0x9F)          leader += 0x40;            pszOut[iDst++] = leader;            pszOut[iDst++] = trailer;        }        else        {            /* We should never get here unless a double-byte pair was              * truncated... but just in case...             */            pszOut[iDst++] = *pszLine;        }    }    pszOut[iDst] = '\0';    return psDBCSInfo->pszDBCSBuf;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -