📄 dnstring.c
字号:
/****************************************************************************
* *
* Certificate String Routines *
* Copyright Peter Gutmann 1996-2007 *
* *
****************************************************************************/
#include <ctype.h>
#if defined( INC_ALL )
#include "cert.h"
#include "asn1.h"
#else
#include "cert/cert.h"
#include "misc/asn1.h"
#endif /* Compiler-specific includes */
/* The character set (or at least ASN.1 string type) for a string. Although
IA5String and VisibleString/ISO646String are technically different the
only real difference is that IA5String allows the full range of control
characters, which isn't notably useful. For this reason we treat both as
ISO646String. Sometimes we can be fed Unicode strings that are just
bloated versions of another string type so we need to account for these
as well.
UTF-8 strings are a pain because they're only rarely supported as a
native format. For this reason we convert them to a more useful local
character set (ASCII, 8859-1, or Unicode as appropriate) when we read
them to make them usable. Although their use was required after the
cutover date of December 2003, by unspoken unanimous consensus of
implementors everywhere implementations are sticking with the existing
DN encoding to avoid breaking things */
typedef enum {
STRINGTYPE_NONE, /* No string type */
/* 8-bit character types */
STRINGTYPE_PRINTABLE, /* PrintableString */
STRINGTYPE_IA5, /* IA5String */
STRINGTYPE_VISIBLE = STRINGTYPE_IA5, /* VisibleString */
/* VisibleString as Unicode */
STRINGTYPE_T61, /* T61 (8859-1) string */
/* 8-bit types masquerading as Unicode */
STRINGTYPE_UNICODE_PRINTABLE, /* PrintableString as Unicode */
STRINGTYPE_UNICODE_IA5, /* IA5String as Unicode */
STRINGTYPE_UNICODE_VISIBLE = STRINGTYPE_UNICODE_IA5,
STRINGTYPE_UNICODE_T61, /* 8859-1 as Unicode */
/* Unicode/UTF-8 */
STRINGTYPE_UNICODE, /* Unicode string */
STRINGTYPE_UTF8, /* UTF-8 string */
/* Special-case error string type */
STRINGTYPE_ERROR, /* Error occurred during processing */
STRINGTYPE_LAST /* Last possible string type */
} ASN1_STRINGTYPE;
/* Since wchar_t can be anything from 8 bits (Borland C++ under DOS) to 32
bits (some oddball RISC Unixen) we define a bmpchar_t for
Unicode/BMPString chars which is always 16 bits as required for
BMPStrings, to match wchar_t. The conversion to and from a BMPString and
wchar_t may require narrowing or widening of characters and possibly
endianness conversion as well */
typedef unsigned short int bmpchar_t; /* Unicode data type */
#define UCSIZE 2
/****************************************************************************
* *
* Character Set Management Functions *
* *
****************************************************************************/
/* Because of the bizarre (and mostly useless) collection of ASN.1 character
types we need to be very careful about what we allow in a string. The
following table is used to determine whether a character is valid within
a given string type.
Although IA5String and VisibleString/ISO646String are technically
different the only real difference is that IA5String allows the full
range of control characters, which isn't notably useful. For this reason
we treat both as ISO646String */
#define P 1 /* PrintableString */
#define I 2 /* IA5String/VisibleString/ISO646String */
#define PI ( P | I ) /* PrintableString and IA5String */
static const int FAR_BSS asn1CharFlags[] = {
/* 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* ! " # $ % & ' ( ) * + , - . / */
PI, I, I, I, I, I, I, PI, PI, PI, I, PI, PI, PI, PI, PI,
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, I, I, PI, I, PI,
/* @ A B C D E F G H I J K L M N O */
I, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI,
/* P Q R S T U V W X Y Z [ \ ] ^ _ */
PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, I, I, I, I, I,
/* ` a b c d e f g h i j k l m n o */
I, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI,
/* p q r s t u v w x y z { | } ~ DL */
PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, PI, I, I, I, I, 0
};
#define nativeCharFlags asn1CharFlags
/* Extract a widechar or bmpchar from an (arbitrarily-aligned) string */
CHECK_RETVAL_RANGE( 0, 0xFFFF ) STDC_NONNULL_ARG( ( 1 ) ) \
static wchar_t getWidechar( IN_BUFFER( 2 ) const BYTE *string )
{
wchar_t ch = 0;
#ifdef DATA_LITTLEENDIAN
int shiftAmt = 0;
#endif /* DATA_LITTLEENDIAN */
int i;
assert( isReadPtr( string, 2 ) );
/* Since we're reading wchar_t-sized values from a char-aligned source,
we have to assemble the data a byte at a time to handle systems where
non-char values can only be accessed on word-aligned boundaries */
for( i = 0; i < sizeof( wchar_t ); i++ )
{
#ifdef DATA_LITTLEENDIAN
ch |= *string++ << shiftAmt;
shiftAmt += 8;
#else
ch = ( ch << 8 ) | *string++;
#endif /* DATA_LITTLEENDIAN */
}
return( ch );
}
CHECK_RETVAL_RANGE( 0, 0xFFFF ) STDC_NONNULL_ARG( ( 1 ) ) \
static wchar_t getBmpchar( IN_BUFFER( 2 ) const BYTE *string )
{
assert( isReadPtr( string, 2 ) );
return( ( ( ( bmpchar_t ) string[ 0 ] ) << 8 ) | \
( bmpchar_t ) string[ 1 ] );
}
/* Try and guess whether a native string is a widechar string */
CHECK_RETVAL_BOOL STDC_NONNULL_ARG( ( 1 ) ) \
static BOOLEAN isNativeWidecharString( IN_BUFFER( stringLen ) const BYTE *string,
IN_LENGTH_SHORT const int stringLen )
{
wchar_t wCh = getWidechar( string );
int hiByte = 0, i;
assert( isReadPtr( string, stringLen ) );
REQUIRES_B( stringLen > 0 && stringLen < MAX_INTLENGTH_SHORT );
REQUIRES_B( !( stringLen % WCSIZE ) );
/* If it's too short to be a widechar string, it's definitely not
Unicode */
if( stringLen < WCSIZE )
{
/* "Too skinny to join the army they said. Didn't make the weight
they said" */
return( FALSE );
}
/* If wchar_t is > 16 bits and the bits above 16 are set or all zero,
it's either definitely not Unicode or Unicode. Note that some
compilers will complain of unreachable code here, unfortunately we
can't easily fix this since WCSIZE is usually an expression involving
sizeof(), which we can't handle via the preprocessor */
#if INT_MAX > 0xFFFFL
if( WCSIZE > 2 )
return( ( wCh > 0xFFFF ) ? FALSE : TRUE );
#endif /* > 16-bit machines */
/* If wchar_t is 8 bits, it's never Unicode. We make this conditional on
the system being 16-bit to avoid compiler warnings about dead code on
the majority of systems, which have > 8-bit wchar_t */
#if INT_MAX < 0xFFFFL
if( WCSIZE < 2 )
return( FALSE );
#endif /* WCSIZE */
/* wchar_t is 16 bits, make sure that we don't get false positives with
short strings. Two-char strings are more likely to be ASCII than a
single widechar, and repeated alternate chars (e.g. "tanaka") in an
ASCII string appear to be widechars for the general-purpose check
below so we check for these in strings of 2-3 wide chars before we
perform the general-purpose check */
if( stringLen <= ( WCSIZE * 3 ) && wCh > 0xFF )
{
if( stringLen == WCSIZE )
{
const int ch1 = string[ 0 ];
const int ch2 = string[ 1 ];
/* Check for a two-char ASCII string, usually a country name */
if( ch1 > 0 && ch1 <= 0x7F && isPrint( ch1 ) && \
ch2 > 0 && ch2 <= 0x7F && isPrint( ch2 ) )
return( FALSE );
}
else
{
const int hi1 = wCh >> 8;
const int hi2 = getWidechar( string + WCSIZE ) >> 8;
const int hi3 = ( stringLen > WCSIZE * 2 ) ? \
getWidechar( string + ( WCSIZE * 2 ) ) >> 8 : hi1;
ENSURES_B( stringLen == ( WCSIZE * 2 ) || \
stringLen == ( WCSIZE * 3 ) );
/* Check for alternate chars being ASCII */
if( isAlnum( hi1 ) && isAlnum( hi2 ) && isAlnum( hi3 ) && \
hi1 == hi2 && hi2 == hi3 )
return( FALSE );
}
}
/* wchar_t is 16 bits, check whether it's in the form { 00 xx }* or
{ AA|00 xx }*, either ASCII-as-Unicode or Unicode. The code used
below is safe because to get to this point the string has to be some
multiple of 2 bytes long. Note that if someone passes in a 1-byte
string and mistakenly includes the terminator in the length it'll be
identified as a 16-bit widechar string, but this doesn't really
matter since it'll get "converted" into a non-widechar string later */
for( i = 0; i < stringLen && i < FAILSAFE_ITERATIONS_LARGE; i += WCSIZE )
{
wCh = getWidechar( string );
string += WCSIZE;
if( wCh > 0xFF )
{
const int wChHi = wCh >> 8;
ENSURES_B( wChHi );
/* If we haven't already seen a high byte, remember it */
if( hiByte == 0 )
hiByte = wChHi;
else
{
/* If the current high byte doesn't match the previous one,
it's probably 8-bit chars */
if( wChHi != hiByte )
return( FALSE );
}
}
}
ENSURES_B( i < FAILSAFE_ITERATIONS_LARGE );
return( TRUE ); /* Probably 16-bit chars */
}
/* Try and figure out the true string type for an ASN.1-encoded or native
string. This detects (or at least tries to detect) not only the basic
string type, but also basic string types encoded as widechar strings, and
widechar strings encoded as basic string types */
CHECK_RETVAL_ENUM( ASN1_STRINGTYPE ) STDC_NONNULL_ARG( ( 1 ) ) \
static ASN1_STRINGTYPE get8bitStringType( IN_BUFFER( stringLen ) const BYTE *string,
IN_LENGTH_SHORT const int stringLen )
{
BOOLEAN notPrintable = FALSE, notIA5 = FALSE;
int length;
assert( isReadPtr( string, stringLen ) );
REQUIRES_EXT( ( stringLen > 0 && stringLen < MAX_INTLENGTH_SHORT ), \
STRINGTYPE_ERROR );
/* Walk down the string checking each character */
for( length = stringLen; length > 0; length-- )
{
const BYTE ch = *string++;
/* If the high bit is set, it's not an ASCII subset */
if( ch >= 128 )
{
notPrintable = notIA5 = TRUE;
if( !asn1CharFlags[ ch & 0x7F ] )
{
/* It's not 8859-1 either, probably some odd widechar
type */
return( STRINGTYPE_NONE );
}
}
else
{
/* Check whether it's a PrintableString */
if( !( asn1CharFlags[ ch ] & P ) )
notPrintable = TRUE;
/* Check whether it's something peculiar */
if( !asn1CharFlags[ ch ] )
return( STRINGTYPE_NONE );
}
}
return( notIA5 ? STRINGTYPE_T61 : notPrintable ? STRINGTYPE_IA5 : \
STRINGTYPE_PRINTABLE );
}
CHECK_RETVAL_ENUM( ASN1_STRINGTYPE ) STDC_NONNULL_ARG( ( 1 ) ) \
static ASN1_STRINGTYPE getAsn1StringType( IN_BUFFER( stringLen ) \
const BYTE *string,
IN_LENGTH_SHORT const int stringLen,
IN_TAG_ENCODED const int stringTag )
{
assert( isReadPtr( string, stringLen ) );
REQUIRES_EXT( ( stringLen > 0 && stringLen < MAX_INTLENGTH_SHORT ), \
STRINGTYPE_ERROR );
REQUIRES_EXT( ( stringTag >= BER_STRING_UTF8 && \
stringTag <= BER_STRING_BMP ),
STRINGTYPE_ERROR );
/* If it's a multiple of bmpchar_t in size check whether it's a
BMPString stuffed into a T61String or an 8-bit string encoded as a
BMPString. The following code assumes that anything claiming to be a
BMPString is always something else, this currently seems to hold true
for all BMPStrings. Hopefully by the time anyone gets around to
using > 8-bit characters everyone will be using UTF8Strings because
there's no easy way to distinguish between a byte string which is a
> 8-bit BMPString and a 7/8-bit string */
if( !( stringLen % UCSIZE ) && *string == '\0' )
{
BOOLEAN notPrintable = FALSE, notIA5 = FALSE;
int length;
/* The first character is a null, it's an 8-bit string stuffed into
a BMPString (these are always big-endian, even coming from
Microsoft software, so we don't have to check for a null as the
second character) */
for( length = stringLen; length > 0; length -= UCSIZE )
{
/* Since we're reading bmpchar_t-sized values from a char-
aligned source we have to assemble the data a byte at a time
to handle systems where non-char values can only be accessed
on word-aligned boundaries */
const bmpchar_t ch = getBmpchar( string );
string += UCSIZE;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -