utf8supp.h
来自「开放源码的编译器open watcom 1.6.0版的源代码」· C头文件 代码 · 共 228 行
H
228 行
/****************************************************************************
*
* Open Watcom Project
*
* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
* ========================================================================
*
* This file contains Original Code and/or Modifications of Original
* Code as defined in and that are subject to the Sybase Open Watcom
* Public License version 1.0 (the 'License'). You may not use this file
* except in compliance with the License. BY USING THIS FILE YOU AGREE TO
* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
* provided with the Original Code and Modifications, and is also
* available at www.sybase.com/developer/opensource.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
* NON-INFRINGEMENT. Please see the License for the specific language
* governing rights and limitations under the License.
*
* ========================================================================
*
* Description: WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
* DESCRIBE IT HERE!
*
****************************************************************************/
#ifndef _UTF8SUPP_H
#define _UTF8SUPP_H
// add other UTF8 encodings
#if !defined(_UTF8_JAVA)
#define _UTF8_JAVA
#endif
#ifdef _UTF8_JAVA
#if defined( UNDER_PALM_OS ) && defined( __GNUC__ )
#define __attribute__( a ) __attribute__(( a ))
#else
#define __attribute__( a )
#endif
#ifdef _UTF8_ENCODE
static unsigned UTF8Encode( char *dest, unsigned c )
__attribute__( section( "ULRT3" ) );
static unsigned UTF8Encode( char *dest, unsigned c ) {
if( c >= 0x0001 && c <= 0x007f ) {
dest[0] = (char)c;
return( 1 );
}
if( c <= 0x07ff ) {
// 0x0000 is encoded as two bytes
dest[0] = (char)( 0xc0 | (( c >> 6 ) & 0x1f));
dest[1] = (char)( 0x80 | ( c & 0x3f));
return( 2 );
}
dest[0] = (char)( 0xe0 | (( c >> 12 ) & 0x0f));
dest[1] = (char)( 0x80 | (( c >> 6 ) & 0x03f));
dest[2] = (char)( 0x80 | ( c & 0x03f));
return( 3 );
}
#endif
#ifdef _UTF8_DECODE
static unsigned UTF8Decode( char const *dest, unsigned *c )
__attribute__( section( "ULRT3" ) );
static unsigned UTF8Decode( char const *dest, unsigned *c ) {
unsigned char const *p = (unsigned char const *)dest;
unsigned char c0, c1, c2;
c0 = p[0];
if( c0 <= 0x7f ) {
*c = c0;
return( 1 );
}
c1 = p[1];
if( c0 < 0xe0 ) {
*c = (( c0 & 0x1f ) << 6 ) | ( c1 & 0x3f );
return( 2 );
}
c2 = p[2];
*c = (( c0 & 0x0f ) << 12 ) | (( c1 & 0x3f ) << 6 ) | ( c2 & 0x3f );
return( 3 );
}
#endif
#ifdef _UTF8_CHECK
static unsigned UTF8Check( char const *dest, unsigned len )
__attribute__( section( "ULRT3" ) );
// returns 0 if the buffer doesn't contain a valid UTF8 encoding prefix
// that uses all the characters provided
static unsigned UTF8Check( char const *dest, unsigned len ) {
unsigned char const *p = (unsigned char const *)dest;
switch( len ) {
case 1:
if(( p[0] & 0x80 ) != 0x00 ) {
break;
}
return 1;
case 2:
if(( p[0] & 0xe0 ) != 0xc0 ) {
break;
}
if(( p[1] & 0xc0 ) != 0x80 ) {
break;
}
return 1;
case 3:
if(( p[0] & 0xf0 ) != 0xe0 ) {
break;
}
if(( p[1] & 0xc0 ) != 0x80 ) {
break;
}
if(( p[2] & 0xc0 ) != 0x80 ) {
break;
}
return 1;
}
return 0;
}
/* test code
#include <assert.h>
#define _UTF8_CHECK
#define _UTF8_DECODE
#include "utf8supp.h"
main() {
unsigned c;
char buff[3];
int i,j,k;
for( i = 0; i < 0x80; ++i ) {
buff[0] = i;
assert( UTF8Check( buff, 1 ) != 0 );
assert( UTF8Decode( buff, &c ) == 1 );
assert( c < 0x80 );
}
for( i = 0xc0; i <= 0xdf; ++i ) {
buff[0] = i;
for( j = 0x80; j <= 0xbf; ++j ) {
buff[1] = j;
assert( UTF8Check( buff, 2 ) != 0 );
assert( UTF8Decode( buff, &c ) == 2 );
assert( c < 0x0800 );
}
}
for( i = 0xe0; i <= 0xef; ++i ) {
buff[0] = i;
for( j = 0x80; j <= 0xbf; ++j ) {
buff[1] = j;
for( k = 0x80; k <= 0xbf; ++k ) {
buff[2] = k;
assert( UTF8Check( buff, 3 ) != 0 );
assert( UTF8Decode( buff, &c ) == 3 );
assert( c <= 0xffff );
}
}
}
for( i = 0; i < 0x100; ++i ) {
buff[0] = i;
if( i < 0x80 ) {
assert( UTF8Check( buff, 1 ) != 0 );
} else {
assert( UTF8Check( buff, 1 ) == 0 );
if( 0xc0 <= i && i <= 0xdf ) {
for( j = 0; j < 0x100; ++j ) {
buff[1] = j;
if( 0x80 <= j && j <= 0xbf ) {
assert( UTF8Check( buff, 2 ) != 0 );
for( k = 0; k < 0x100; ++k ) {
assert( UTF8Check( buff, 3 ) == 0 );
}
} else {
assert( UTF8Check( buff, 2 ) == 0 );
}
}
} else if( 0xe0 <= i && i <= 0xef ) {
for( j = 0; j < 0x100; ++j ) {
buff[1] = j;
if( 0x80 <= j && j <= 0xbf ) {
assert( UTF8Check( buff, 2 ) == 0 );
for( k = 0; k < 0x100; ++k ) {
buff[2] = k;
if( 0x80 <= k && k <= 0xbf ) {
assert( UTF8Check( buff, 3 ) != 0 );
} else {
assert( UTF8Check( buff, 3 ) == 0 );
}
}
} else {
assert( UTF8Check( buff, 2 ) == 0 );
}
}
} else {
for( j = 0; j < 0x100; ++j ) {
buff[1] = j;
assert( UTF8Check( buff, 2 ) == 0 );
for( k = 0; k < 0x100; ++k ) {
buff[2] = k;
assert( UTF8Check( buff, 3 ) == 0 );
}
}
}
}
}
puts( "PASS" );
}
*/
#endif
#endif
#endif
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?