📄 dnstring.c

📁 cryptlib安全工具包
💻 C
📖 第 1 页 / 共 3 页
字号:
			/* If the high bit is set it's not an ASCII subset */
			if( ch >= 128 )
				{
				notPrintable = notIA5 = TRUE;
				if( !asn1CharFlags[ ch & 0x7F ] )
					{
					/* It's not 8859-1 either */
					return( STRINGTYPE_UNICODE );
					}
				}
			else
				{
				/* Check whether it's a PrintableString */
				if( !( asn1CharFlags[ ch ] & P ) )
					notPrintable = TRUE;
				}
			}

		return( notIA5 ? STRINGTYPE_UNICODE_T61 : notPrintable ? \
				STRINGTYPE_UNICODE_IA5 : STRINGTYPE_UNICODE_PRINTABLE );
		}

	/* If it's supposed to be Unicode and not an 8-bit string encoded as a
	   Unicode string, it's Unicode */
	if( stringTag == BER_STRING_BMP && !( stringLen % UCSIZE ) )
		return( STRINGTYPE_UNICODE );

	/* Determine the 8-bit string type */
	return( get8bitStringType( string, stringLen ) );
	}

CHECK_RETVAL_ENUM( ASN1_STRINGTYPE ) STDC_NONNULL_ARG( ( 1 ) ) \
static ASN1_STRINGTYPE getNativeStringType( IN_BUFFER( stringLen ) \
												const BYTE *string, 
											IN_LENGTH_SHORT const int stringLen )
	{
	BOOLEAN notPrintable = FALSE, notIA5 = FALSE;

	assert( isReadPtr( string, stringLen ) );

	REQUIRES_EXT( ( stringLen > 0 && stringLen < MAX_INTLENGTH_SHORT ), \
				  STRINGTYPE_ERROR );

	/* If it's a multiple of wchar_t in size check whether it's a widechar 
	   string.  If it's a widechar string it may actually be something else 
	   that's been bloated out into widechars so we check for this as well */
	if( !( stringLen % WCSIZE ) && \
		isNativeWidecharString( string, stringLen ) )
		{
		int length;

		for( length = stringLen; length > 0; length -= WCSIZE )
			{
			const wchar_t ch = getWidechar( string );
			string += WCSIZE;

			/* Safety check */
			if( ch & 0xFFFF0000L )
				return( STRINGTYPE_NONE );

			/* If the high bit is set it's not an ASCII subset */
			if( ch >= 128 )
				{
				notPrintable = notIA5 = TRUE;
				if( !nativeCharFlags[ ch & 0x7F ] )
					{
					/* It's not 8859-1 either */
					return( STRINGTYPE_UNICODE );
					}
				}
			else
				{
				/* Check whether it's a PrintableString */
				if( !( nativeCharFlags[ ch ] & P ) )
					notPrintable = TRUE;
				}
			}

		return( notIA5 ? STRINGTYPE_UNICODE_T61 : notPrintable ? \
				STRINGTYPE_UNICODE_IA5 : STRINGTYPE_UNICODE_PRINTABLE );
		}

	/* Determine the 8-bit string type */
	return( get8bitStringType( string, stringLen ) );
	}

/****************************************************************************
*																			*
*								UTF-8 Functions								*
*																			*
****************************************************************************/

/* Parse one character from the string, enforcing the UTF-8 canonical-
   encoding rules:

	  00 -  7F = 0xxxxxxx
	 80 -  7FF = 110xxxxx 10xxxxxx 
	800 - FFFF = 1110xxxx 10xxxxxx 10xxxxxx */

STDC_NONNULL_ARG( ( 1, 3 ) ) \
static long getUTF8Char( IN_BUFFER( stringMaxLen ) const BYTE *string, 
						 IN_LENGTH_SHORT const int stringMaxLen,
						 OUT_LENGTH_SHORT_Z int *charByteCount )
	{
	const int firstChar = *string;
	int count = -1;
	long largeCh;

	assert( isReadPtr( string, stringMaxLen ) );
	assert( isWritePtr( charByteCount, sizeof( int ) ) );

	REQUIRES( stringMaxLen > 0 && stringMaxLen < MAX_INTLENGTH_SHORT );

	/* Clear return value */
	*charByteCount = 0;

	if( firstChar < 0 || firstChar > 255 )
		return( CRYPT_ERROR_BADDATA );
	if( !( firstChar & 0x80 ) )
		{
		/* Simplest case, straight ASCII */
		*charByteCount = 1;
		return( firstChar & 0x7F );
		}
	if( ( firstChar & 0xC0 ) == 0x80 )	/* 11xxxxxx != 10xxxxxx */
		return( CRYPT_ERROR_BADDATA );
	if( ( firstChar & 0xE0 ) == 0xC0 )	/* 111xxxxx == 110xxxxx */
		count = 2;
	else
		{
		if( ( firstChar & 0xF0 ) == 0xE0 )	/* 1111xxxx == 1110xxxx */
			count = 3;
		else
			{
			/* In theory we can also get 4- and 5-byte encodings but this 
			   is far more likely to be something invalid than a genuine 
			   attempt to represent something in Tsolyani */
			return( CRYPT_ERROR_BADDATA );
			}
		}
	if( count < 2 || count > 3 || count > stringMaxLen )
		return( CRYPT_ERROR_BADDATA );
	switch( count )
		{
		case 2:
			if( ( string[ 1 ] & 0xC0 ) != 0x80 )
				return( CRYPT_ERROR_BADDATA );
			largeCh = ( ( firstChar & 0x1F ) << 6 ) | \
						( string[ 1 ] & 0x3F );
			break;

		case 3:
			if( ( string[ 1 ] & 0xC0 ) != 0x80 || \
				( string[ 2 ] & 0xC0 ) != 0x80 )
				return( CRYPT_ERROR_BADDATA );
			largeCh = ( ( firstChar & 0x1F ) << 12 ) | \
					  ( ( string[ 1 ] & 0x3F ) << 6 ) | \
						( string[ 2 ] & 0x3F );
			break;

		default:
			retIntError();
		}
	if( largeCh < 0 || largeCh > 0xFFFF )
		return( CRYPT_ERROR_BADDATA );

	*charByteCount = count;
	return( largeCh & 0xFFFF );
	}

#if 0	/* Currently unused, see note at start */

static int putUTF8Char( BYTE *string, const long largeCh )
	{
	if( largeCh < 0x80 )
		{
		*string = ( BYTE ) largeCh;
		return( 1 );
		}
	if( largeCh < 0x0800 )
		{
		*string++ = ( BYTE )( 0xC0 | largeCh >> 6 );
		*string = ( BYTE )( 0x80 | largeCh & 0x3F );
		return( 2 );
		}
	*string++ = ( BYTE )( 0xE0 | largeCh >> 12 );
	*string++ = ( BYTE )( 0x80 | ( ( largeCh >> 6 ) & 0x3F ) );
	*string = ( BYTE )( 0x80 | largeCh & 0x3F );
	return( 3 );
	}
#endif /* 0 */

/* Determine the length of a string once it's encoded as UTF-8 */

CHECK_RETVAL STDC_NONNULL_ARG( ( 1, 3 ) ) \
static int utf8TargetStringLen( IN_BUFFER( stringLen ) const void *string, 
								IN_LENGTH_SHORT const int stringLen,
								OUT_LENGTH_SHORT_Z int *targetStringLength,
								const BOOLEAN isWideChar )
	{
	REQUIRES( stringLen > 0 && stringLen < MAX_INTLENGTH_SHORT );

	/* Clear return value */
	*targetStringLength = 0;

	if( isWideChar )
		{
		const wchar_t *wcStrPtr = ( wchar_t * ) string;
		int length = 0, i;

		for( i = 0; i < stringLen && \
					i < FAILSAFE_ITERATIONS_LARGE; i += WCSIZE )
			{
			const wchar_t ch = *wcStrPtr++;

			length += ( ch < 0x80 ) ? 1 : ( ch < 0x0800 ) ? 2 : 3;
			}
		ENSURES( i < FAILSAFE_ITERATIONS_LARGE );

		*targetStringLength = length;
		}
	else
		*targetStringLength = stringLen;

	return( CRYPT_OK );
	}

/* Convert a UTF-8 string to ASCII, 8859-1, or Unicode, and vice versa */

CHECK_RETVAL STDC_NONNULL_ARG( ( 1, 3, 4 ) ) \
static int copyFromUtf8String( OUT_BUFFER( destMaxLen, *destLen ) void *dest, 
							   IN_LENGTH_SHORT const int destMaxLen, 
							   OUT_LENGTH_SHORT_Z int *destLen, 
							   IN_BUFFER( sourceLen ) const void *source, 
							   IN_LENGTH_SHORT const int sourceLen )
	{
	ASN1_STRINGTYPE stringType = STRINGTYPE_PRINTABLE;
	const BYTE *srcPtr = source;
	wchar_t *wcDestPtr = dest;
	BYTE *destPtr = dest;
	int noChars = 0, count, i;

	assert( isWritePtr( dest, destMaxLen ) );
	assert( isWritePtr( destLen, sizeof( int ) ) );
	assert( isReadPtr( source, sourceLen ) );

	REQUIRES( destMaxLen > 0 && destMaxLen < MAX_INTLENGTH_SHORT );
	REQUIRES( sourceLen > 0 && sourceLen < MAX_INTLENGTH_SHORT );

	/* Clear return value */
	memset( dest, 0, min( 16, destMaxLen ) );
	*destLen = 0;

	/* Scan the string to determine its length and the widest character type 
	   in it.  We have to process the entire string even once we've 
	   identified it as containing the widest string type (Unicode) in order 
	   to check for malformed chars */
	for( i = 0; i < sourceLen && i < FAILSAFE_ITERATIONS_LARGE; i += count )
		{
		const long largeCh = getUTF8Char( srcPtr + i, sourceLen - i, &count );

		if( largeCh < 0 || largeCh > 0xFFFFL )
			return( CRYPT_ERROR_BADDATA );
		noChars++;
		if( stringType == STRINGTYPE_UNICODE || largeCh > 0xFF )
			stringType = STRINGTYPE_UNICODE;
		else
			{
			/* If it's not a PrintableString char mark it as T61 if it's 
			   within range, otherwise it's Unicode */
			if( largeCh >= 128 )
				{
				stringType = ( asn1CharFlags[ largeCh & 0x7F ] & P ) ? \
							 STRINGTYPE_T61 : STRINGTYPE_UNICODE;
				}
			}
		}
	ENSURES( i < FAILSAFE_ITERATIONS_LARGE );

	/* Make sure that the translated string will fit into the destination 
	   buffer */
	*destLen = noChars * ( ( stringType == STRINGTYPE_UNICODE ) ? \
						   WCSIZE : 1 );
	if( *destLen > destMaxLen )
		return( CRYPT_ERROR_OVERFLOW );

	/* Perform a second pass copying the string over */
	for( i = 0; i < sourceLen && i < FAILSAFE_ITERATIONS_LARGE; i += count )
		{
		const long largeCh = getUTF8Char( srcPtr + i, sourceLen - i, 
										  &count );

		ENSURES( largeCh >= 0 && largeCh <= 0xFFFFL );

		/* Copy the result as a Unicode or ASCII/8859-1 character */
		if( stringType == STRINGTYPE_UNICODE )
			*wcDestPtr++ = ( wchar_t ) largeCh;
		else
			*destPtr++ = ( BYTE ) largeCh;
		}
	ENSURES( i < FAILSAFE_ITERATIONS_LARGE );

	return( stringType );
	}

#if 0	/* Currently unused, see note at start */

static int copyToUtf8String( OUT_BUFFER( destMaxLen, *destLen ) \
							 void *dest, const int destMaxLen, int *destLen,
							 IN_BUFFER( sourceLen ) \
							 const void *source, const int sourceLen,
							 const BOOLEAN isWideChar )
	{
	assert( isWritePtr( dest, destMaxLen ) );
	assert( isWritePtr( destLen, sizeof( int ) ) );
	assert( isReadPtr( source, sourceLen ) );

	/* Clear return value */
	memset( dest, 0, min( 16, destMaxLen ) );
	*destLen = 0;

	if( isWideChar )
		{
		const wchar_t *wcStrPtr = source;
		BYTE *destPtr = dest;
		int length = 0, i;

		for( i = 0; i < sourceLen && \
					i < FAILSAFE_ITERATIONS_LARGE; i += WCSIZE )
			{
			const int utf8charLength = putUTF8Char( destPtr, *wcStrPtr++, 
													destMaxLen - destLen );

			if( utf8charLength < 0 )
				return( utf8charLength );
			length += utf8charLength;
			destPtr += length;
			}
		ENSURES( i < FAILSAFE_ITERATIONS_LARGE );
		*destLen = length;

		return( CRYPT_OK );
		}

	memcpy( dest, source, sourceLen );
	*destLen = sourceLen;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -