trnscode.c

来自「是一个手机功能的模拟程序」· C语言代码 · 共 974 行 · 第 1/2 页
974 行
/*
 * Copyright (C) Ericsson Mobile Communications AB, 2000.
 * Licensed to AU-System AB.
 * All rights reserved.
 *
 * This software is covered by the license agreement between
 * the end user and AU-System AB, and may be used and copied
 * only in accordance with the terms of the said agreement.
 *
 * Neither Ericsson Mobile Communications AB nor AU-System AB
 * assumes any responsibility or liability for any errors or inaccuracies in
 * this software, or any consequential, incidental or indirect damage arising
 * out of the use of the Generic WAP Client software.
 */
/*********************
trnscode.c
Created by KHN (AU-System Radio AB)
*********************/
/*********
History
-------
000119 (KHN) Corrected the way the length of UCS2 strings are calculated.
               Before a BYTE* was cast into WCHAR* and since most
							 calculations were made on a part of a bigger BYTE* string
							 the reading of the WCHAR* resulted in error due to misalignment.

**********/


#include "trnscode.h"

#ifdef NO_GLOBAL_VARS
#include "userdata.h"
#else
static fPtr_Iana2Unicode_canConvert         g_pCanConvert = NULL;
static fPtr_Iana2Unicode_calcLen            g_pCalcLen = NULL;
static fPtr_Iana2Unicode_convert            g_pConvert = NULL;
static fPtr_Iana2Unicode_getNullTermByteLen g_pNullLen = NULL;
#endif




void newReverseMemcpy(WCHAR* pwchResult, BYTE* pbStr, size_t iLen)
{
	/* ASSERT: iLen must be even.
	*/

	/* Divide with two */
	iLen>>=1;

	/* Copy one WCHAR at a time */
	while (iLen--)
	{
		/* a b ---> (b<<8) | a */

		*pwchResult++=(WCHAR) ( ((WCHAR)(((WCHAR)(*(pbStr+1)))<<8)) | (WCHAR) (*pbStr) );
		pbStr+=2;
	}
}


void newmemcpy(WCHAR* pwchResult, BYTE* pbStr, size_t iLen)
{
	/* ASSERT: iLen must be even.
	*/

	/* Divide with two */
	iLen>>=1;

	/* Copy one WCHAR at a time */
	while (iLen--)
	{
		/* a b ---> (a<<8) | b */

		*pwchResult++=(WCHAR) ( ((WCHAR)(((WCHAR)(*pbStr))<<8)) | (WCHAR) (*(pbStr+1)) );
		pbStr+=2;
	}
}


UINT32 ucs2len(const BYTE* str)
{
  UINT32	len = 0;

	while (*str || *(str+1))
	{
		len++;
		str += 2;
	}

  return(len);
}


/* ================================================================ */
/*
File:    ConvertUTF.C
Author: Mark E. Davis
Copyright (C) 1994 Taligent, Inc. All rights reserved.

This code is copyrighted. Under the copyright laws, this code may not
be copied, in whole or part, without prior written consent of Taligent.

Taligent grants the right to use or reprint this code as long as this
ENTIRE copyright notice is reproduced in the code or reproduction.
The code is provided AS-IS, AND TALIGENT DISCLAIMS ALL WARRANTIES,
EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  IN
NO EVENT WILL TALIGENT BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING,
WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS
INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN
IF TALIGENT HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF
LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE
LIMITATION MAY NOT APPLY TO YOU.

RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the
government is subject to restrictions as set forth in subparagraph
(c)(l)(ii) of the Rights in Technical Data and Computer Software
clause at DFARS 252.227-7013 and FAR 52.227-19.

This code may be protected by one or more U.S. and International
Patents.

TRADEMARKS: Taligent and the Taligent Design Mark are registered
trademarks of Taligent, Inc.
*/
/* ================================================================ */
typedef UINT32                  UCS4;
typedef UINT16                  UCS2;
typedef UINT16                  UTF16;
typedef UINT8                   UTF8;
/* ================================================================ */
/*	Each of these routines converts the text between *sourceStart and
sourceEnd, putting the result into the buffer between *targetStart and
targetEnd. Note: the end pointers are *after* the last item: e.g.
*(sourceEnd - 1) is the last item.

	The return result indicates whether the conversion was successful,
and if not, whether the problem was in the source or target buffers.

	After the conversion, *sourceStart and *targetStart are both
updated to point to the end of last text successfully converted in
the respective buffers.
*/

typedef enum {
    ok,                 /* conversion successful */
    sourceExhausted,    /* partial character in source, but hit end */
    targetExhausted     /* insuff. room in target for conversion */
} ConversionResult;

/* ================================================================ */
const UCS4 kReplacementCharacter =  0x0000FFFDUL;
const UCS4 kMaximumUCS2 =           0x0000FFFFUL;
const UCS4 kMaximumUTF16 =          0x0010FFFFUL;
const UCS4 kMaximumUCS4 =           0x7FFFFFFFUL;

const INT32 halfShift           = 10;
const UCS4 halfBase             = 0x0010000UL;
const UCS4 halfMask             = 0x3FFUL;
const UCS4 kSurrogateHighStart  = 0xD800UL;
const UCS4 kSurrogateHighEnd    = 0xDBFFUL;
const UCS4 kSurrogateLowStart   = 0xDC00UL;
const UCS4 kSurrogateLowEnd     = 0xDFFFUL;
/* ================================================================ */

const UCS4 offsetsFromUTF8[6] =    {0x00000000UL, 0x00003080UL, 0x000E2080UL,
                                    0x03C82080UL, 0xFA082080UL, 0x82082080UL};
const UINT8 bytesFromUTF8[256] = {
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5};

const UTF8 firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};

/* ================================================================ */
/*    This code is similar in effect to making successive calls on the
mbtowc and wctomb routines in FSS-UTF. However, it is considerably
different in code:
* it is adapted to be consistent with UTF16,
* the interface converts a whole buffer to avoid function-call overhead
* constants have been gathered.
* loops & conditionals have been removed as much as possible for
efficiency, in favor of drop-through switch statements.
*/
/* ================================================================ */

ConversionResult    ConvertUTF8toUTF16 (
        UTF8** sourceStart, UTF8* sourceEnd,
        UTF16** targetStart, const UTF16* targetEnd)
{
    ConversionResult result = ok;
    UINT8 tmpCh;
    register UTF8* source = *sourceStart;
    register UTF16* target = *targetStart;

    while (source < sourceEnd) {
        /*register*/ UCS4 ch = 0;
        register unsigned short extraBytesToWrite = bytesFromUTF8[*source];
        if (source + extraBytesToWrite > sourceEnd) {
            result = sourceExhausted; break;
        };

        switch(extraBytesToWrite) {    /* note: code falls through cases! */
            case 5:
                tmpCh = *source++;
                ch += tmpCh;
                ch <<= 6;
            case 4:
                tmpCh = *source++;
                ch += tmpCh;
                ch <<= 6;
            case 3:
                tmpCh = *source++;
                ch += tmpCh;
                ch <<= 6;
            case 2:
                tmpCh = *source++;
                ch += tmpCh;
                ch <<= 6;
            case 1:
                tmpCh = *source++;
                ch += tmpCh;
                ch <<= 6;
            case 0:
                tmpCh = *source++;
                ch += tmpCh;
        };
        ch -= offsetsFromUTF8[extraBytesToWrite];

        if (target >= targetEnd) {
            result = targetExhausted; break;
        };
        if (ch <= kMaximumUCS2) {
            *target++ = (UTF16)ch;
        } else if (ch > kMaximumUTF16) {
            *target++ = (UTF16)kReplacementCharacter;
        } else {
            if (target + 1 >= targetEnd) {
                result = targetExhausted; break;
            };
            ch -= halfBase;
            *target++ = (UTF16)((ch >> halfShift) + kSurrogateHighStart);
            *target++ = (UTF16)((ch & halfMask) + kSurrogateLowStart);
        };
    };
    *sourceStart = source;
    *targetStart = target;
    return result;
}

/************************************************
GetUT8CharLen
-------------
Return the number of bytes in the UTF8 char.
The only exception is for the 0 byte value (NULL)
which results in 0 as return value.
**************************************************/
UINT8 GetUT8CharLen(BYTE firstByte)
{
    BYTE        checkBit1 = 0x80;
    UINT8   nbrOfBytes = 0;

    if (firstByte == 0) {
        return 0; /* NULL found */
    }
    while ((firstByte & checkBit1) != 0) {
        nbrOfBytes++;
        firstByte = (BYTE)(firstByte << 1);
    }

    if (nbrOfBytes != 0) {
        return nbrOfBytes;
    }
    else {
        return 1; /* special case when only one byte (first bit is 0) */
    }
}


INT32 UTF8Len( BYTE* utf8Str, BOOL nullTerminated, UINT32 *byteLen )
/* if nullTerminated and byteLen != 0  then this value will eb used as a upper boundary */
/* returns nbr of characters, NOT bytes! */
/* the return value is -1 if the UTF8 string is corrupt (not properly ended) */
/* if nullTerminated then byteLen will be returned with the exact length in bytes.
        If not nullTerminated, the byteLen will not be changed */
{
    BYTE        checkUTF8byte = 0xC0;    /* to get the 2 most significant bits of the byte */
    BYTE        UTF8byte =      0x80; /* 10XXXXXX */
    UINT32    	strPos =        0;
    UINT8       i;
    UINT8       utf8CharLen;
    UINT32    	result = 0;
    BOOL        correctStr = TRUE;
    BOOL        foundEnd = FALSE;

    if (nullTerminated) {
        /* the string is nullterminated */
        while (correctStr && (!foundEnd))
        {
            utf8CharLen = GetUT8CharLen( *utf8Str );
            if (utf8CharLen == 0)
            {
                foundEnd = TRUE;
            }
            else
            {
                result++;

                strPos++;
                i = 2;
                while ((correctStr) && (i <= utf8CharLen))
                {
										if ((*byteLen != 0) && (strPos >= *byteLen))
										/* check so that we are not passing the upper limit (if any) */
                    {
                    	/* passed the upper limit in the middle of a utf8 char
                      => invalid string */
											correctStr = FALSE;
                    }
                    else
                    {
											/* make sure that there is not an imbedded NULL
	                    or other illegal character */
                      if ( (correctStr = ((BOOL)(((*(utf8Str + (i-1))) & checkUTF8byte) == UTF8byte))) )
                      {
		                    strPos++;
		                    i++;
                      }
                    }
                }
								if (correctStr)
                {
	                utf8Str += utf8CharLen;
                }
            }

            if ((*byteLen != 0) && (strPos >= *byteLen))
            {
                /* there is an upper bounds strLen that must be checked */
                correctStr = FALSE;
            }
        }
				/* *byteLen will contain the correct length
        or if something went wrong: the number of bytes read excluding
        the byte where the error was detected*/
        *byteLen = strPos;
        if (correctStr)
        {
            return result;
        }
        else
        {
            return -1;
        }
    }
    else {
        /* known length string */
        while (strPos < *byteLen)
        {
            utf8CharLen = GetUT8CharLen( *utf8Str );
            if (utf8CharLen == 0)
            {
                /* NULL found and in this mode it is accepted as a char */
                utf8CharLen = 1;
            }
            strPos += utf8CharLen;
            utf8Str += utf8CharLen;
	          result++;
        }
        if (strPos == *byteLen)
        {
            /* correct ending of utf8 string */
            return result;
        }
        else
        {
            /* length of characters not matching with length of string */
            return -1;
        }
    }
}

/* ================================================================ */

ConversionResult	ConvertUTF16toUTF8 (
		UTF16** sourceStart, const UTF16* sourceEnd,
		UTF8** targetStart, const UTF8* targetEnd)
{
	ConversionResult result = ok;
	register UTF16* source = *sourceStart;
	register UTF8* target = *targetStart;
	register UCS4 ch;
	register unsigned short bytesToWrite = 0;
	register const UCS4 byteMask = 0xBF;
	register const UCS4 byteMark = 0x80;
	register UCS4 ch2 = *source;

	while (source < sourceEnd)
  {
		ch = *source++;
		if (ch >= kSurrogateHighStart && ch <= kSurrogateHighEnd
				&& source < sourceEnd)
    {
			ch2 = *source;
			if (ch2 >= kSurrogateLowStart && ch2 <= kSurrogateLowEnd)
      {
				ch = ((ch - kSurrogateHighStart) << halfShift)
					+ (ch2 - kSurrogateLowStart) + halfBase;
				++source;
			};
		};
		if (ch < 0x80) {				bytesToWrite = 1;
		} else if (ch < 0x800) {		bytesToWrite = 2;
		} else if (ch < 0x10000) {		bytesToWrite = 3;
		} else if (ch < 0x200000) {		bytesToWrite = 4;
		} else if (ch < 0x4000000) {	bytesToWrite = 5;
		} else if (ch <= kMaximumUCS4){	bytesToWrite = 6;
		} else {						bytesToWrite = 2;
										ch = kReplacementCharacter;
		}; /* I wish there were a smart way to avoid this conditional */

		target += bytesToWrite;
		if (target > targetEnd)
    {
			target -= bytesToWrite; result = targetExhausted; break;
		};
		switch (bytesToWrite)
    {	/* note: code falls through cases! */
			case 6:	*--target = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
			case 5:	*--target = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
			case 4:	*--target = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
			case 3:	*--target = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
			case 2:	*--target = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
			case 1:	*--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
		};
		target += bytesToWrite;
	};
	*sourceStart = source;
	*targetStart = target;
	return result;
}

/* ================================================================ */













/*===========================================
Iana2Unicode_canConvert
---------------------------------------------
Returns TRUE if transcoding of the charset
is available, otherwise FALSE.

Parameters:
-----------
iIANAcharset: The MIBenum IANA code that corresponds
              to a specific character encoding.

=============================================
Returns: TRUE if transcoding is supported for the charset,
         otherwise FALSE.
============================================*/
BOOL Iana2Unicode_canConvert( INT16 iIANAcharset )
{
	switch (iIANAcharset)
	{
		case IANA_CHARSET_INVALID:
			return FALSE;
trnscode.c - 源码说明

本页面展示了「是一个手机功能的模拟程序」中的 trnscode.c 源码文件，采用 C语言编程语言编写，共 974 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与手机功能相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?