plx_charset.c

来自「mtk wap和mms代码。。适应mtk 25。26平台」· C语言代码 · 共 2,285 行 · 第 1/5 页
2,285 行
*     cchWideChar   : Specifies the size, in wide characters, of the buffer 
*                     pointed to by the lpWideCharStr parameter. If this 
*                     value is zero, the function returns the required 
*                     buffer size, in wide characters, and makes no use of 
*                     the lpWideCharStr buffer.
* Return         :
*    If the function succeeds, and cchWideChar is nonzero, the return value 
*    is the number of wide characters written to the buffer pointed to by 
*    lpWideCharStr. 
*    If the function succeeds, and cchWideChar is zero, the return value is 
*    the required size, in wide characters, for a buffer that can receive 
*    the translated string.
*    If the function fails, the return value is zero.
* Remarks      
\**************************************************************************/
int PlxMultiByteToWideChar(UINT CodePage, DWORD dwFlags, 
                        LPCSTR lpMultiByteStr, int cchMultiByte, 
                        LPWSTR lpWideCharStr, int cchWideChar)
{
//    TEXTMETRIC tm;
    int nSrcChars;
    int nDstChars;
    int nBytes;
    char* lpSrc = NULL;
    WCHAR* lpDst = NULL;
    LONG lDistance;
    BOOL bForSize = FALSE;
    BOOL bIsGb18030 = FALSE;

    BOOL bInvalidCode = FALSE;

    if (!InitConvChars())
    {
        ASSERT(0);
        //SetLastError(1);
        return 0;
    }

    if (CodePage != CP_ACP || 
        dwFlags != 0 || lpMultiByteStr == NULL)
    {
        ASSERT(0);
//        SetLastError(1);
        return 0;
    }

    /* to get the required buffer size in wide characters */
    if (cchWideChar == 0)
        bForSize = TRUE;

    if (!bForSize &&  lpWideCharStr == 0)
    {
        /* null pointer，function fails */
        ASSERT(0);
        //SetLastError(1);
        return 0;
    }

    /* lpMultiByteStr is a null-terminated string */
    if (cchMultiByte == -1)
    {
        nSrcChars = strlen(lpMultiByteStr) + 1;
    }
    else
        nSrcChars = cchMultiByte;

    if (nSrcChars <= 0)
        return 0;

    /* check if default font is GB18030 */
//    if (GetTextMetrics(NULL, &tm))
    {
#if !defined(REMOVE_GB18030_SUPPORT)
        if (tm.tmCharSet == CHARSET_GB18030)
            bIsGb18030 = TRUE;
#endif  // REMOVE_GB18030_SUPPORT
    }

    /***********************************************************************
     *
     * In GBK scope:
     *      high byte of the multi-byte character is between 0x81 and 0xfe, 
     *      low byte of the multi-byte character is between 0x40 and 0xfe. 
     *
     **********************************************************************/
    lpSrc = (char *)lpMultiByteStr;
    lpDst = lpWideCharStr;
    nBytes = 0;
    nDstChars = 0;
    while (1)
    {
        BYTE ch1, ch2, ch3, ch4;
        int ret;

        nDstChars++;
        if (!bForSize)
        {
            if (nDstChars > cchWideChar)    // no enough output buffer
                return 0;
        }

lbl_redo:
        if (bInvalidCode)
        {
            // found invalid code, so consider all code as ascii character
            ch1 = *lpSrc;
            ret = 1;
            bInvalidCode = FALSE;
        }
        else
        ret = CheckIsGbChar(lpSrc, nSrcChars - nBytes, bIsGb18030, 
            &ch1, &ch2, &ch3, &ch4);
        switch (ret)
        {
        case 1:
            // ASCII character
            if (!bForSize)
            {
                // no conversion under all charsets(2004-05-24)
                if (ch1 == 0x80)    // for unicode 0x20AC
                {
                    *(BYTE *)lpDst = 0xAC;
                    *((BYTE *)lpDst + 1) = 0x20;
                }
                else
                {
                    if (ch1 > 0x80) // wrong character
                    {
                        *(BYTE *)lpDst = 0x20;
                        *((BYTE *)lpDst + 1) = 0x00;
                    }
                    else
                    {
                        *(BYTE *)lpDst = ch1;
                        *((BYTE *)lpDst + 1) = 0x00;
                    }
                }
                lpDst++;
            }
            break;

        case 2:
            // 2 byte code
            if (!bForSize)
            {
                if (ch1 == 0xaa && (ch2 >= 0xa1 && ch2 <= 0xc8))
                {
                    // GSM character in user-defined code area
                    lDistance = (ch2 - 0xa1) * 2;
                    memcpy(lpDst, gb2gsm + lDistance, 2);
                    lpDst++;
                    break;
                }

                lDistance = (ch1 - 0x81) * 191L * 2L + (ch2 - 0x40) * 2;
                memcpy_far(lpDst, (PBYTEHUGE)pFileGb2Uni + lDistance, 2);
                lpDst++;
            }
            break;

#if !defined(REMOVE_GB18030_SUPPORT)
        case 4:
            // 4 byte code
            if (!bForSize)
            {
                indextbl_t g2u;
                WORD wch;

                lDistance = (((ch1 - 0x81) * 10L + (ch2 - 0x30)) * 126L + 
                    ch3 - 0x81) * 10L + ch4 - 0x30;
                if (lDistance > 0x99FBL)
                    return 0;

                /* GB+81308130 - GB+8431A439 */
                g2u = __gb18030_to_ucs_index[lDistance >> 8];
                if ((lDistance & 0xFF) >= g2u.tblbegin && 
                    (lDistance & 0xFF) <= g2u.tblend)
                    wch = __gb18030_4byte_to_ucs[lDistance - g2u.tbloffset];
                else
                    wch = g2u.algoffset + (lDistance & 0xFF);
                *(BYTE *)lpDst = (BYTE)(wch & 0xFF);
                *((BYTE *)lpDst + 1) = (BYTE)((wch >> 8) & 0xFF);
                lpDst++;
            }
            break;
#endif  // REMOVE_GB18030_SUPPORT

        case -1:
            bInvalidCode = TRUE;
            goto lbl_redo;

        default:
            return 0;
        }

        lpSrc += ret;
        nBytes += ret;
        if (nBytes >= nSrcChars)
            break;
    }

    lpWideCharStr[nDstChars] = lpWideCharStr[nDstChars + 1] = 0;
    return nDstChars;
}
/*
static void Gbk2Gb(BYTE *pGbBuf, BYTE *pGbkCode)
{
    if ((*pGbkCode >= 0xA1 && *pGbkCode <= 0xF7) && 
        (*(pGbkCode + 1) >= 0xA1 && *(pGbkCode + 1) <= 0xFE))
        memcpy(pGbBuf, pGbkCode, 2);
    else
    {
        *pGbBuf = ' ';
        *(pGbBuf + 1) = ' ';
    }
}*/

#if !defined(REMOVE_GB18030_SUPPORT)
static int Ucs2ToGB18030(WCHAR wCode, BYTE **lppDst, int *pDstChars, 
                         int cchMultiByte, BOOL bForSize)
{
    DWORD idx;      /* The bytesize of the GB18030 character */
    int count = 2;  /* This is the most common case.  */
    indextbl_t u2g;
    BYTE gbch[4] = {'\0', '\0', '\0', '\0'};

    if (wCode <= 0x7F)  // ASCII character
    {
        count = 1;
        idx = (DWORD)wCode;

        goto lbl_handle_idx;
    }
    else if (wCode == 0x20AC)
    {
        count = 1;
        idx = 0x0080;

        goto lbl_handle_idx;
    }
    else if (wCode <= 0xFF || (wCode >= 0x0390 && wCode < 0x03B0))
    {
        // for ASCII or Greek characters in user-defined gb areas
        int i;
        BYTE *pCode;
        BOOL bFound = FALSE;

        pCode = (BYTE *)gb2gsm;
        for (i = 0; i < sizeof(gb2gsm) / sizeof(WCHAR); i++)
        {
            if ((*pCode + (*(pCode + 1) << 8)) == wCode)
            {
                bFound = TRUE;
                break;
            }
            pCode += 2;
        }
        if (bFound)
        {
            count = 2;
            idx = 0xAAA1 + i;

            goto lbl_handle_idx;
        }
    }

    if (wCode <= 0xD7FFL || (wCode >= 0xE766L && wCode <= 0xFFFFL))
    {
        WORD tblentry;

        u2g = __ucs_to_gb18030_index[wCode >> 8];

        if ((wCode & 0xFF) < u2g.tblbegin || 
            (wCode & 0xFF) > u2g.tblend)
        {
            /* Use algorithm (4-byte GB18030) */

            idx = u2g.algoffset + (wCode & 0xFF);
            /* Yikes, my index table could not cover one special case */
            if (wCode >= 0x49B8 && wCode <= 0x49FF)
                idx -= 11;
            count = 4;

            goto lbl_handle_idx;
        }

        /* Use mapping table (2-byte or 4-byte GB18030) */
        tblentry = __ucs_to_gb18030[wCode - u2g.tbloffset];

        if (tblentry > 0x8000L)
            /* 2-byte GB18030 */
            idx = (DWORD)tblentry;
        else
        {
            /* 4-byte GB18030 stored in a special compact format */
            BYTE a, b;
            a = 0x81;
            b = 0x30 + (tblentry >> 11);
            if (tblentry >= 0x7000)
            {
                a += 3;
                b -= 14;
            }
            else if (tblentry >= 0x6000)
            {
                a += 2;
                b -= 6;
            }
            else if (tblentry >= 0x3000)
            {
                a += 1;
                b -= 6;
            }
            else if (b >= 0x31)
                b += 5;

            gbch[0] = a;
            gbch[1] = b;
            gbch[2] = 0x81 + (tblentry >> 4 & 0x7F);
            gbch[3] = 0x30 + (tblentry & 0xF);
            count = 4;
        }   // (tblentry > 0x8000)
    }
#if 0   // not supporting user-defined unicode areas
    else if (wCode >= 0xE000L && wCode <= 0xE765L)
    {
        /* User-defined areas in GB18030 (2-byte) */
        if (wCode <= 0xE233L)
            idx = 0xAAA1L + (((wCode - 0xE000L) / 94) << 8) + 
                (wCode - 0xE000L) % 94;
        else if (wCode <= 0xE4C5L)
            idx = 0xF8A1L + (((wCode - 0xE234L) / 94) << 8) + 
                (wCode - 0xE234L) % 94;
        else
        {
            idx = 0xA140L + (((wCode - 0xE4C6L) / 96) << 8) + 
                (wCode - 0xE4C6L) % 96;
            /* Skip the gap at 0x7F */
            if ((idx & 0xFF) >= 0x7F)
                idx++;
        }
    }
#endif
    else
    {
        count = 1;
        idx = 0x20; // ' '
    }

lbl_handle_idx:
    switch (count)
    {
    case 1:
        (*pDstChars)++;
        if (!bForSize)
        {
            if (*pDstChars > cchMultiByte)
                return 0;
            *(*lppDst)++ = (BYTE)idx;
        }
        break;

    case 2:
        gbch[0] = (unsigned char)((idx >> 8) & 0xFF);
        gbch[1] = (unsigned char)(idx & 0xFF);
        /* See whether there is enough room for the second byte we
           write.  */
        if (gbch[1] == '\0')
            (*pDstChars)++;
        else
            *pDstChars += 2;
        if (!bForSize)
        {
            if (*pDstChars > cchMultiByte)
                return 0;
            *(*lppDst)++ = gbch[0];
            if (gbch[1] != '\0')
                *(*lppDst)++ = gbch[1];
        }
        break;

    case 4:
        /* See whether there is enough room for all four bytes we
           write.  */
        *pDstChars += 4;
        if (!bForSize)
        {
            if (*pDstChars > cchMultiByte)
                return 0;

            if (gbch[0] && gbch[1] && gbch[2] && gbch[3])
            {
                *(*lppDst) = gbch[0];
                *(*lppDst + 1) = gbch[1];
                *(*lppDst + 2) = gbch[2];
                *(*lppDst + 3) = gbch[3];
            }
            else
            {
                *(*lppDst + 3) = (unsigned char)(idx % 10) + 0x30;
                idx /= 10;
                *(*lppDst + 2) = (unsigned char)(idx % 126) + 0x81;
                idx /= 126;
                *(*lppDst + 1) = (unsigned char)(idx % 10) + 0x30;
                *(*lppDst) = (unsigned char)(idx / 10) + 0x81;
            }
            *lppDst += 4;
        }
        break;

    default:
        return 0;
    }   // switch (count)

    return count;
}
#endif  // REMOVE_GB18030_SUPPORT

static int IsUniInUserGb(WCHAR wCode)
{
    int i;
    BYTE *pCode;

    pCode = (BYTE *)gb2gsm;
    for (i = 0; i < sizeof(gb2gsm) / sizeof(WCHAR); i++)
    {
        if ((*pCode + (*(pCode + 1) << 8)) == wCode)
        {
            return i;
        }
        pCode += 2;
    }

    return -1;
}

static int Ucs2ToGB2312(UINT CodePage, WCHAR wCode, BYTE *lpDst, 
                        int *pDstChars, int *pchMultiByte, BOOL bForSize)
{
    int count;
    int case_no = -1;
    WCHAR table[20] = {0x0, 0x1, 0x2, 0x3, 0x4, 0x20, 0x21, 0x22, 0x23,
        0x24, 0x25, 0x26, 0x30, 0x31, 0x32, 0x33, 0xF9, 0xFA, 0xFE, 0xFF};

    /* according to the high byte (sometimes with low byte) of UNICODE,
     * switch cases to 0, 1, 2, 3, 4.
     *  0: the high byte is 0x0, it is dealed alone;
     *  1: the UNICODE is in range of 0x4EOO to 0x9FFF;
     *  2: the UNICODE is in range of 0xE000 to 0xE8FF;
     *  3: the high byte is one of elements in table[];
     *      NOTE:   if the high byte is in table[], and the UNICODE can 
     *              be found in the file "other_gb.cod", then it will
     *              be dealed.
     *              while if the UNICODE can't be found in the file 
     *              "other_gb.cod", then it will go to case 4 and then 
     *              be handled.
     *  4: others.
plx_charset.c - 源码说明

本页面展示了「mtk wap和mms代码。。适应mtk 25。26平台」中的 plx_charset.c 源码文件，采用 C语言编程语言编写，共 2,285 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与mtk相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?