utf8.c
来自「支持SSL v2/v3, TLS, PKCS #5, PKCS #7, PKCS」· C语言 代码 · 共 2,062 行 · 第 1/5 页
C
2,062 行
/* * The contents of this file are subject to the Mozilla Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is the Netscape security libraries. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1994-2000 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the * terms of the GNU General Public License Version 2 or later (the * "GPL"), in which case the provisions of the GPL are applicable * instead of those above. If you wish to allow use of your * version of this file only under the terms of the GPL and not to * allow others to use your version of this file under the MPL, * indicate your decision by deleting the provisions above and * replace them with the notice and other provisions required by * the GPL. If you do not delete the provisions above, a recipient * may use your version of this file under either the MPL or the * GPL. */#ifdef DEBUGstatic const char CVS_ID[] = "@(#) $RCSfile: utf8.c,v $ $Revision: 1.2 $ $Date: 2000/04/04 02:36:46 $ $Name: NSS_3_1_1_RTM $";#endif /* DEBUG */#include "seccomon.h"#include "secport.h"/* * Define this if you want to support UTF-16 in UCS-2 */#define UTF16/* * From RFC 2044: * * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx */ /* * From http://www.imc.org/draft-hoffman-utf16 * * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000 * * U' = yyyyyyyyyyxxxxxxxxxx * W1 = 110110yyyyyyyyyy * W2 = 110111xxxxxxxxxx *//* * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit * character values. If you wish to use this code for working with * host byte order values, define the following: * * #if IS_BIG_ENDIAN * #define L_0 0 * #define L_1 1 * #define L_2 2 * #define L_3 3 * #define H_0 0 * #define H_1 1 * #else / * not everyone has elif * / * #if IS_LITTLE_ENDIAN * #define L_0 3 * #define L_1 2 * #define L_2 1 * #define L_3 0 * #define H_0 1 * #define H_1 0 * #else * #error "PDP and NUXI support deferred" * #endif / * IS_LITTLE_ENDIAN * / * #endif / * IS_BIG_ENDIAN * / */#define L_0 0#define L_1 1#define L_2 2#define L_3 3#define H_0 0#define H_1 1PR_IMPLEMENT(PRBool)sec_port_ucs4_utf8_conversion_function( PRBool toUnicode, unsigned char *inBuf, unsigned int inBufLen, unsigned char *outBuf, unsigned int maxOutBufLen, unsigned int *outBufLen){#ifndef TEST_UTF8 PORT_Assert((unsigned int *)NULL != outBufLen);#endif /* TEST_UTF8 */ if( toUnicode ) { unsigned int i, len = 0; for( i = 0; i < inBufLen; ) { if( (inBuf[i] & 0x80) == 0x00 ) i += 1; else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2; else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3; else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4; else if( (inBuf[i] & 0xFC) == 0xF8 ) i += 5; else if( (inBuf[i] & 0xFE) == 0xFC ) i += 6; else return PR_FALSE; len += 4; } if( len > maxOutBufLen ) { *outBufLen = len; return PR_FALSE; } len = 0; for( i = 0; i < inBufLen; ) { if( (inBuf[i] & 0x80) == 0x00 ) { /* 0000 0000-0000 007F <- 0xxxxxx */ /* 0abcdefg -> 00000000 00000000 00000000 0abcdefg */ outBuf[len+L_0] = 0x00; outBuf[len+L_1] = 0x00; outBuf[len+L_2] = 0x00; outBuf[len+L_3] = inBuf[i+0] & 0x7F; i += 1; } else if( (inBuf[i] & 0xE0) == 0xC0 ) { if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE; /* 0000 0080-0000 07FF <- 110xxxxx 10xxxxxx */ /* 110abcde 10fghijk -> 00000000 00000000 00000abc defghijk */ outBuf[len+L_0] = 0x00; outBuf[len+L_1] = 0x00; outBuf[len+L_2] = ((inBuf[i+0] & 0x1C) >> 2); outBuf[len+L_3] = ((inBuf[i+0] & 0x03) << 6) | ((inBuf[i+1] & 0x3F) >> 0); i += 2; } else if( (inBuf[i] & 0xF0) == 0xE0 ) { if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE; if( (inBuf[i+2] & 0xC0) != 0x80 ) return PR_FALSE; /* 0000 0800-0000 FFFF <- 1110xxxx 10xxxxxx 10xxxxxx */ /* 1110abcd 10efghij 10klmnop -> 00000000 00000000 abcdefgh ijklmnop */ outBuf[len+L_0] = 0x00; outBuf[len+L_1] = 0x00; outBuf[len+L_2] = ((inBuf[i+0] & 0x0F) << 4) | ((inBuf[i+1] & 0x3C) >> 2); outBuf[len+L_3] = ((inBuf[i+1] & 0x03) << 6) | ((inBuf[i+2] & 0x3F) >> 0); i += 3; } else if( (inBuf[i] & 0xF8) == 0xF0 ) { if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE; if( (inBuf[i+2] & 0xC0) != 0x80 ) return PR_FALSE; if( (inBuf[i+3] & 0xC0) != 0x80 ) return PR_FALSE; /* 0001 0000-001F FFFF <- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ /* 11110abc 10defghi 10jklmno 10pqrstu -> 00000000 000abcde fghijklm nopqrstu */ outBuf[len+L_0] = 0x00; outBuf[len+L_1] = ((inBuf[i+0] & 0x07) << 2) | ((inBuf[i+1] & 0x30) >> 4); outBuf[len+L_2] = ((inBuf[i+1] & 0x0F) << 4) | ((inBuf[i+2] & 0x3C) >> 2); outBuf[len+L_3] = ((inBuf[i+2] & 0x03) << 6) | ((inBuf[i+3] & 0x3F) >> 0); i += 4; } else if( (inBuf[i] & 0xFC) == 0xF8 ) { if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE; if( (inBuf[i+2] & 0xC0) != 0x80 ) return PR_FALSE; if( (inBuf[i+3] & 0xC0) != 0x80 ) return PR_FALSE; if( (inBuf[i+4] & 0xC0) != 0x80 ) return PR_FALSE; /* 0020 0000-03FF FFFF <- 111110xx 10xxxxxx ... 10xxxxxx */ /* 111110ab 10cdefgh 10ijklmn 10opqrst 10uvwxyz -> 000000ab cdefghij klmnopqr stuvwxyz */ outBuf[len+L_0] = inBuf[i+0] & 0x03; outBuf[len+L_1] = ((inBuf[i+1] & 0x3F) << 2) | ((inBuf[i+2] & 0x30) >> 4); outBuf[len+L_2] = ((inBuf[i+2] & 0x0F) << 4) | ((inBuf[i+3] & 0x3C) >> 2); outBuf[len+L_3] = ((inBuf[i+3] & 0x03) << 6) | ((inBuf[i+4] & 0x3F) >> 0); i += 5; } else /* if( (inBuf[i] & 0xFE) == 0xFC ) */ { if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE; if( (inBuf[i+2] & 0xC0) != 0x80 ) return PR_FALSE; if( (inBuf[i+3] & 0xC0) != 0x80 ) return PR_FALSE; if( (inBuf[i+4] & 0xC0) != 0x80 ) return PR_FALSE; if( (inBuf[i+5] & 0xC0) != 0x80 ) return PR_FALSE; /* 0400 0000-7FFF FFFF <- 1111110x 10xxxxxx ... 10xxxxxx */ /* 1111110a 10bcdefg 10hijklm 10nopqrs 10tuvwxy 10zABCDE -> 0abcdefg hijklmno pqrstuvw xyzABCDE */ outBuf[len+L_0] = ((inBuf[i+0] & 0x01) << 6) | ((inBuf[i+1] & 0x3F) >> 0); outBuf[len+L_1] = ((inBuf[i+2] & 0x3F) << 2) | ((inBuf[i+3] & 0x30) >> 4); outBuf[len+L_2] = ((inBuf[i+3] & 0x0F) << 4) | ((inBuf[i+4] & 0x3C) >> 2); outBuf[len+L_3] = ((inBuf[i+4] & 0x03) << 6) | ((inBuf[i+5] & 0x3F) >> 0); i += 6; } len += 4; } *outBufLen = len; return PR_TRUE; } else { unsigned int i, len = 0; for( i = 0; i < inBufLen; i += 4 ) { if( inBuf[i+L_0] >= 0x04 ) len += 6; else if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] >= 0x20) ) len += 5; else if( inBuf[i+L_1] >= 0x01 ) len += 4; else if( inBuf[i+L_2] >= 0x08 ) len += 3; else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2; else len += 1; } if( len > maxOutBufLen ) { *outBufLen = len; return PR_FALSE; } len = 0; for( i = 0; i < inBufLen; i += 4 ) { if( inBuf[i+L_0] >= 0x04 ) { /* 0400 0000-7FFF FFFF -> 1111110x 10xxxxxx ... 10xxxxxx */ /* 0abcdefg hijklmno pqrstuvw xyzABCDE -> 1111110a 10bcdefg 10hijklm 10nopqrs 10tuvwxy 10zABCDE */ outBuf[len+0] = 0xFC | ((inBuf[i+L_0] & 0x40) >> 6); outBuf[len+1] = 0x80 | ((inBuf[i+L_0] & 0x3F) >> 0); outBuf[len+2] = 0x80 | ((inBuf[i+L_1] & 0xFC) >> 2); outBuf[len+3] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4) | ((inBuf[i+L_2] & 0xF0) >> 4); outBuf[len+4] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) | ((inBuf[i+L_3] & 0xC0) >> 6); outBuf[len+5] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); len += 6; } else if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] >= 0x20) ) { /* 0020 0000-03FF FFFF -> 111110xx 10xxxxxx ... 10xxxxxx */ /* 000000ab cdefghij klmnopqr stuvwxyz -> 111110ab 10cdefgh 10ijklmn 10opqrst 10uvwxyz */ outBuf[len+0] = 0xF8 | ((inBuf[i+L_0] & 0x03) >> 0); outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0xFC) >> 2); outBuf[len+2] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4) | ((inBuf[i+L_2] & 0xF0) >> 4); outBuf[len+3] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) | ((inBuf[i+L_3] & 0xC0) >> 6); outBuf[len+4] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); len += 5; } else if( inBuf[i+L_1] >= 0x01 ) { /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ /* 00000000 000abcde fghijklm nopqrstu -> 11110abc 10defghi 10jklmno 10pqrstu */ outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2); outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4) | ((inBuf[i+L_2] & 0xF0) >> 4); outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) | ((inBuf[i+L_3] & 0xC0) >> 6); outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); len += 4; } else if( inBuf[i+L_2] >= 0x08 ) { /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ /* 00000000 00000000 abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */ outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4); outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) | ((inBuf[i+L_3] & 0xC0) >> 6); outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); len += 3; } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) { /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ /* 00000000 00000000 00000abc defghijk -> 110abcde 10fghijk */ outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2) | ((inBuf[i+L_3] & 0xC0) >> 6); outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); len += 2; } else { /* 0000 0000-0000 007F -> 0xxxxxx */ /* 00000000 00000000 00000000 0abcdefg -> 0abcdefg */ outBuf[len+0] = (inBuf[i+L_3] & 0x7F); len += 1; } } *outBufLen = len; return PR_TRUE; }}PR_IMPLEMENT(PRBool)sec_port_ucs2_utf8_conversion_function( PRBool toUnicode, unsigned char *inBuf, unsigned int inBufLen, unsigned char *outBuf, unsigned int maxOutBufLen, unsigned int *outBufLen){#ifndef TEST_UTF8 PORT_Assert((unsigned int *)NULL != outBufLen);#endif /* TEST_UTF8 */ if( toUnicode ) { unsigned int i, len = 0; for( i = 0; i < inBufLen; ) { if( (inBuf[i] & 0x80) == 0x00 ) { i += 1; len += 2; } else if( (inBuf[i] & 0xE0) == 0xC0 ) { i += 2; len += 2; } else if( (inBuf[i] & 0xF0) == 0xE0 ) { i += 3; len += 2;#ifdef UTF16 } else if( (inBuf[i] & 0xF8) == 0xF0 ) { i += 4; len += 4; if( (inBuf[i] & 0x04) && ((inBuf[i] & 0x03) || (inBuf[i+1] & 0x30)) ) { /* Not representable as UTF16 */ return PR_FALSE; }#endif /* UTF16 */ } else return PR_FALSE; } if( len > maxOutBufLen ) { *outBufLen = len; return PR_FALSE; } len = 0; for( i = 0; i < inBufLen; ) { if( (inBuf[i] & 0x80) == 0x00 ) { /* 0000-007F <- 0xxxxxx */ /* 0abcdefg -> 00000000 0abcdefg */ outBuf[len+H_0] = 0x00; outBuf[len+H_1] = inBuf[i+0] & 0x7F; i += 1; len += 2; } else if( (inBuf[i] & 0xE0) == 0xC0 ) { if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE; /* 0080-07FF <- 110xxxxx 10xxxxxx */ /* 110abcde 10fghijk -> 00000abc defghijk */ outBuf[len+H_0] = ((inBuf[i+0] & 0x1C) >> 2); outBuf[len+H_1] = ((inBuf[i+0] & 0x03) << 6) | ((inBuf[i+1] & 0x3F) >> 0); i += 2; len += 2; } else if( (inBuf[i] & 0xF0) == 0xE0 ) { if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE; if( (inBuf[i+2] & 0xC0) != 0x80 ) return PR_FALSE; /* 0800-FFFF <- 1110xxxx 10xxxxxx 10xxxxxx */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?