utf8.c

来自「支持SSL v2/v3, TLS, PKCS #5, PKCS #7, PKCS」· C语言 代码 · 共 2,062 行 · 第 1/5 页

C
2,062
字号
/* * The contents of this file are subject to the Mozilla Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/MPL/ *  * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. *  * The Original Code is the Netscape security libraries. *  * The Initial Developer of the Original Code is Netscape * Communications Corporation.  Portions created by Netscape are  * Copyright (C) 1994-2000 Netscape Communications Corporation.  All * Rights Reserved. *  * Contributor(s): *  * Alternatively, the contents of this file may be used under the * terms of the GNU General Public License Version 2 or later (the * "GPL"), in which case the provisions of the GPL are applicable  * instead of those above.  If you wish to allow use of your  * version of this file only under the terms of the GPL and not to * allow others to use your version of this file under the MPL, * indicate your decision by deleting the provisions above and * replace them with the notice and other provisions required by * the GPL.  If you do not delete the provisions above, a recipient * may use your version of this file under either the MPL or the * GPL. */#ifdef DEBUGstatic const char CVS_ID[] = "@(#) $RCSfile: utf8.c,v $ $Revision: 1.2 $ $Date: 2000/04/04 02:36:46 $ $Name: NSS_3_1_1_RTM $";#endif /* DEBUG */#include "seccomon.h"#include "secport.h"/* * Define this if you want to support UTF-16 in UCS-2 */#define UTF16/* * From RFC 2044: * * UCS-4 range (hex.)           UTF-8 octet sequence (binary) * 0000 0000-0000 007F   0xxxxxxx * 0000 0080-0000 07FF   110xxxxx 10xxxxxx * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx * 0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx * 0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * 0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx */  /* * From http://www.imc.org/draft-hoffman-utf16 * * For U on [0x00010000,0x0010FFFF]:  Let U' = U - 0x00010000 * * U' = yyyyyyyyyyxxxxxxxxxx * W1 = 110110yyyyyyyyyy * W2 = 110111xxxxxxxxxx *//* * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit * character values.  If you wish to use this code for working with * host byte order values, define the following: * * #if IS_BIG_ENDIAN * #define L_0 0 * #define L_1 1 * #define L_2 2 * #define L_3 3 * #define H_0 0 * #define H_1 1 * #else / * not everyone has elif * / * #if IS_LITTLE_ENDIAN * #define L_0 3 * #define L_1 2 * #define L_2 1 * #define L_3 0 * #define H_0 1 * #define H_1 0 * #else * #error "PDP and NUXI support deferred" * #endif / * IS_LITTLE_ENDIAN * / * #endif / * IS_BIG_ENDIAN * / */#define L_0 0#define L_1 1#define L_2 2#define L_3 3#define H_0 0#define H_1 1PR_IMPLEMENT(PRBool)sec_port_ucs4_utf8_conversion_function(  PRBool toUnicode,  unsigned char *inBuf,  unsigned int inBufLen,  unsigned char *outBuf,  unsigned int maxOutBufLen,  unsigned int *outBufLen){#ifndef TEST_UTF8  PORT_Assert((unsigned int *)NULL != outBufLen);#endif /* TEST_UTF8 */  if( toUnicode ) {    unsigned int i, len = 0;    for( i = 0; i < inBufLen; ) {      if( (inBuf[i] & 0x80) == 0x00 ) i += 1;      else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2;      else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3;      else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4;      else if( (inBuf[i] & 0xFC) == 0xF8 ) i += 5;      else if( (inBuf[i] & 0xFE) == 0xFC ) i += 6;      else return PR_FALSE;      len += 4;    }    if( len > maxOutBufLen ) {      *outBufLen = len;      return PR_FALSE;    }    len = 0;    for( i = 0; i < inBufLen; ) {      if( (inBuf[i] & 0x80) == 0x00 ) {        /* 0000 0000-0000 007F <- 0xxxxxx */        /* 0abcdefg ->            00000000 00000000 00000000 0abcdefg */        outBuf[len+L_0] = 0x00;        outBuf[len+L_1] = 0x00;        outBuf[len+L_2] = 0x00;        outBuf[len+L_3] = inBuf[i+0] & 0x7F;        i += 1;      } else if( (inBuf[i] & 0xE0) == 0xC0 ) {        if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE;        /* 0000 0080-0000 07FF <- 110xxxxx 10xxxxxx */        /* 110abcde 10fghijk ->           00000000 00000000 00000abc defghijk */        outBuf[len+L_0] = 0x00;        outBuf[len+L_1] = 0x00;        outBuf[len+L_2] = ((inBuf[i+0] & 0x1C) >> 2);        outBuf[len+L_3] = ((inBuf[i+0] & 0x03) << 6) | ((inBuf[i+1] & 0x3F) >> 0);        i += 2;      } else if( (inBuf[i] & 0xF0) == 0xE0 ) {        if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE;        if( (inBuf[i+2] & 0xC0) != 0x80 ) return PR_FALSE;        /* 0000 0800-0000 FFFF <- 1110xxxx 10xxxxxx 10xxxxxx */        /* 1110abcd 10efghij 10klmnop ->           00000000 00000000 abcdefgh ijklmnop */        outBuf[len+L_0] = 0x00;        outBuf[len+L_1] = 0x00;        outBuf[len+L_2] = ((inBuf[i+0] & 0x0F) << 4) | ((inBuf[i+1] & 0x3C) >> 2);        outBuf[len+L_3] = ((inBuf[i+1] & 0x03) << 6) | ((inBuf[i+2] & 0x3F) >> 0);        i += 3;      } else if( (inBuf[i] & 0xF8) == 0xF0 ) {        if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE;        if( (inBuf[i+2] & 0xC0) != 0x80 ) return PR_FALSE;        if( (inBuf[i+3] & 0xC0) != 0x80 ) return PR_FALSE;        /* 0001 0000-001F FFFF <- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */        /* 11110abc 10defghi 10jklmno 10pqrstu ->            00000000 000abcde fghijklm nopqrstu */                   outBuf[len+L_0] = 0x00;        outBuf[len+L_1] = ((inBuf[i+0] & 0x07) << 2) | ((inBuf[i+1] & 0x30) >> 4);        outBuf[len+L_2] = ((inBuf[i+1] & 0x0F) << 4) | ((inBuf[i+2] & 0x3C) >> 2);        outBuf[len+L_3] = ((inBuf[i+2] & 0x03) << 6) | ((inBuf[i+3] & 0x3F) >> 0);        i += 4;      } else if( (inBuf[i] & 0xFC) == 0xF8 ) {        if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE;        if( (inBuf[i+2] & 0xC0) != 0x80 ) return PR_FALSE;        if( (inBuf[i+3] & 0xC0) != 0x80 ) return PR_FALSE;        if( (inBuf[i+4] & 0xC0) != 0x80 ) return PR_FALSE;        /* 0020 0000-03FF FFFF <- 111110xx 10xxxxxx ... 10xxxxxx */        /* 111110ab 10cdefgh 10ijklmn 10opqrst 10uvwxyz ->            000000ab cdefghij klmnopqr stuvwxyz */        outBuf[len+L_0] = inBuf[i+0] & 0x03;        outBuf[len+L_1] = ((inBuf[i+1] & 0x3F) << 2) | ((inBuf[i+2] & 0x30) >> 4);        outBuf[len+L_2] = ((inBuf[i+2] & 0x0F) << 4) | ((inBuf[i+3] & 0x3C) >> 2);        outBuf[len+L_3] = ((inBuf[i+3] & 0x03) << 6) | ((inBuf[i+4] & 0x3F) >> 0);        i += 5;      } else /* if( (inBuf[i] & 0xFE) == 0xFC ) */ {        if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE;        if( (inBuf[i+2] & 0xC0) != 0x80 ) return PR_FALSE;        if( (inBuf[i+3] & 0xC0) != 0x80 ) return PR_FALSE;        if( (inBuf[i+4] & 0xC0) != 0x80 ) return PR_FALSE;        if( (inBuf[i+5] & 0xC0) != 0x80 ) return PR_FALSE;        /* 0400 0000-7FFF FFFF <- 1111110x 10xxxxxx ... 10xxxxxx */        /* 1111110a 10bcdefg 10hijklm 10nopqrs 10tuvwxy 10zABCDE ->            0abcdefg hijklmno pqrstuvw xyzABCDE */        outBuf[len+L_0] = ((inBuf[i+0] & 0x01) << 6) | ((inBuf[i+1] & 0x3F) >> 0);        outBuf[len+L_1] = ((inBuf[i+2] & 0x3F) << 2) | ((inBuf[i+3] & 0x30) >> 4);        outBuf[len+L_2] = ((inBuf[i+3] & 0x0F) << 4) | ((inBuf[i+4] & 0x3C) >> 2);        outBuf[len+L_3] = ((inBuf[i+4] & 0x03) << 6) | ((inBuf[i+5] & 0x3F) >> 0);        i += 6;      }      len += 4;    }    *outBufLen = len;    return PR_TRUE;  } else {    unsigned int i, len = 0;    for( i = 0; i < inBufLen; i += 4 ) {      if( inBuf[i+L_0] >= 0x04 ) len += 6;      else if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] >= 0x20) ) len += 5;      else if( inBuf[i+L_1] >= 0x01 ) len += 4;      else if( inBuf[i+L_2] >= 0x08 ) len += 3;      else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2;      else len += 1;    }    if( len > maxOutBufLen ) {      *outBufLen = len;      return PR_FALSE;    }    len = 0;    for( i = 0; i < inBufLen; i += 4 ) {      if( inBuf[i+L_0] >= 0x04 ) {        /* 0400 0000-7FFF FFFF -> 1111110x 10xxxxxx ... 10xxxxxx */        /* 0abcdefg hijklmno pqrstuvw xyzABCDE ->           1111110a 10bcdefg 10hijklm 10nopqrs 10tuvwxy 10zABCDE */        outBuf[len+0] = 0xFC | ((inBuf[i+L_0] & 0x40) >> 6);        outBuf[len+1] = 0x80 | ((inBuf[i+L_0] & 0x3F) >> 0);        outBuf[len+2] = 0x80 | ((inBuf[i+L_1] & 0xFC) >> 2);        outBuf[len+3] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)                             | ((inBuf[i+L_2] & 0xF0) >> 4);        outBuf[len+4] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)                             | ((inBuf[i+L_3] & 0xC0) >> 6);        outBuf[len+5] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);        len += 6;      } else if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] >= 0x20) ) {        /* 0020 0000-03FF FFFF -> 111110xx 10xxxxxx ... 10xxxxxx */        /* 000000ab cdefghij klmnopqr stuvwxyz ->           111110ab 10cdefgh 10ijklmn 10opqrst 10uvwxyz */        outBuf[len+0] = 0xF8 | ((inBuf[i+L_0] & 0x03) >> 0);        outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0xFC) >> 2);        outBuf[len+2] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)                             | ((inBuf[i+L_2] & 0xF0) >> 4);        outBuf[len+3] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)                             | ((inBuf[i+L_3] & 0xC0) >> 6);        outBuf[len+4] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);        len += 5;      } else if( inBuf[i+L_1] >= 0x01 ) {        /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */        /* 00000000 000abcde fghijklm nopqrstu ->           11110abc 10defghi 10jklmno 10pqrstu */        outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2);        outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)                             | ((inBuf[i+L_2] & 0xF0) >> 4);        outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)                             | ((inBuf[i+L_3] & 0xC0) >> 6);        outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);        len += 4;      } else if( inBuf[i+L_2] >= 0x08 ) {        /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */        /* 00000000 00000000 abcdefgh ijklmnop ->           1110abcd 10efghij 10klmnop */        outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4);        outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)                             | ((inBuf[i+L_3] & 0xC0) >> 6);        outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);        len += 3;      } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) {        /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */        /* 00000000 00000000 00000abc defghijk ->           110abcde 10fghijk */        outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2)                             | ((inBuf[i+L_3] & 0xC0) >> 6);        outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);        len += 2;      } else {        /* 0000 0000-0000 007F -> 0xxxxxx */        /* 00000000 00000000 00000000 0abcdefg ->           0abcdefg */        outBuf[len+0] = (inBuf[i+L_3] & 0x7F);        len += 1;      }    }                                *outBufLen = len;    return PR_TRUE;  }}PR_IMPLEMENT(PRBool)sec_port_ucs2_utf8_conversion_function(  PRBool toUnicode,  unsigned char *inBuf,  unsigned int inBufLen,  unsigned char *outBuf,  unsigned int maxOutBufLen,  unsigned int *outBufLen){#ifndef TEST_UTF8  PORT_Assert((unsigned int *)NULL != outBufLen);#endif /* TEST_UTF8 */  if( toUnicode ) {    unsigned int i, len = 0;    for( i = 0; i < inBufLen; ) {      if( (inBuf[i] & 0x80) == 0x00 ) {        i += 1;        len += 2;      } else if( (inBuf[i] & 0xE0) == 0xC0 ) {        i += 2;        len += 2;      } else if( (inBuf[i] & 0xF0) == 0xE0 ) {        i += 3;        len += 2;#ifdef UTF16      } else if( (inBuf[i] & 0xF8) == 0xF0 ) {         i += 4;        len += 4;        if( (inBuf[i] & 0x04) &&             ((inBuf[i] & 0x03) || (inBuf[i+1] & 0x30)) ) {          /* Not representable as UTF16 */          return PR_FALSE;        }#endif /* UTF16 */      } else return PR_FALSE;    }    if( len > maxOutBufLen ) {      *outBufLen = len;      return PR_FALSE;    }    len = 0;    for( i = 0; i < inBufLen; ) {      if( (inBuf[i] & 0x80) == 0x00 ) {        /* 0000-007F <- 0xxxxxx */        /* 0abcdefg -> 00000000 0abcdefg */        outBuf[len+H_0] = 0x00;        outBuf[len+H_1] = inBuf[i+0] & 0x7F;        i += 1;        len += 2;      } else if( (inBuf[i] & 0xE0) == 0xC0 ) {        if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE;        /* 0080-07FF <- 110xxxxx 10xxxxxx */        /* 110abcde 10fghijk -> 00000abc defghijk */        outBuf[len+H_0] = ((inBuf[i+0] & 0x1C) >> 2);        outBuf[len+H_1] = ((inBuf[i+0] & 0x03) << 6) | ((inBuf[i+1] & 0x3F) >> 0);        i += 2;        len += 2;      } else if( (inBuf[i] & 0xF0) == 0xE0 ) {        if( (inBuf[i+1] & 0xC0) != 0x80 ) return PR_FALSE;        if( (inBuf[i+2] & 0xC0) != 0x80 ) return PR_FALSE;        /* 0800-FFFF <- 1110xxxx 10xxxxxx 10xxxxxx */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?