📄 utf.c
字号:
/* Set len to the maximum number of bytes required in the output buffer. */ if( desiredEnc==SQLITE_UTF8 ){ /* When converting from UTF-16, the maximum growth results from ** translating a 2-byte character to a 3-byte UTF-8 character (i.e. ** code-point 0xFFFC). A single byte is required for the output string ** nul-terminator. */ len = (pMem->n/2) * 3 + 1; }else{ /* When converting from UTF-8 to UTF-16 the maximum growth is caused ** when a 1-byte UTF-8 character is translated into a 2-byte UTF-16 ** character. Two bytes are required in the output buffer for the ** nul-terminator. */ len = pMem->n * 2 + 2; } /* Set zIn to point at the start of the input buffer and zTerm to point 1 ** byte past the end. ** ** Variable zOut is set to point at the output buffer. This may be space ** obtained from malloc(), or Mem.zShort, if it large enough and not in ** use, or the zShort array on the stack (see above). */ zIn = pMem->z; zTerm = &zIn[pMem->n]; if( len>NBFS ){ zOut = sqliteMallocRaw(len); if( !zOut ) return SQLITE_NOMEM; }else{ zOut = zShort; } z = zOut; if( pMem->enc==SQLITE_UTF8 ){ if( desiredEnc==SQLITE_UTF16LE ){ /* UTF-8 -> UTF-16 Little-endian */ while( zIn<zTerm ){ READ_UTF8(zIn, c); WRITE_UTF16LE(z, c); } }else{ assert( desiredEnc==SQLITE_UTF16BE ); /* UTF-8 -> UTF-16 Big-endian */ while( zIn<zTerm ){ READ_UTF8(zIn, c); WRITE_UTF16BE(z, c); } } pMem->n = z - zOut; *z++ = 0; }else{ assert( desiredEnc==SQLITE_UTF8 ); if( pMem->enc==SQLITE_UTF16LE ){ /* UTF-16 Little-endian -> UTF-8 */ while( zIn<zTerm ){ READ_UTF16LE(zIn, c); WRITE_UTF8(z, c); } }else{ /* UTF-16 Little-endian -> UTF-8 */ while( zIn<zTerm ){ READ_UTF16BE(zIn, c); WRITE_UTF8(z, c); } } pMem->n = z - zOut; } *z = 0; assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len ); sqlite3VdbeMemRelease(pMem); pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short); pMem->enc = desiredEnc; if( zOut==zShort ){ memcpy(pMem->zShort, zOut, len); zOut = pMem->zShort; pMem->flags |= (MEM_Term|MEM_Short); }else{ pMem->flags |= (MEM_Term|MEM_Dyn); } pMem->z = zOut;translate_out:#if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG) { char zBuf[100]; sqlite3VdbeMemPrettyPrint(pMem, zBuf, 100); fprintf(stderr, "OUTPUT: %s\n", zBuf); }#endif return SQLITE_OK;}/*** This routine checks for a byte-order mark at the beginning of the ** UTF-16 string stored in *pMem. If one is present, it is removed and** the encoding of the Mem adjusted. This routine does not do any** byte-swapping, it just sets Mem.enc appropriately.**** The allocation (static, dynamic etc.) and encoding of the Mem may be** changed by this function.*/int sqlite3VdbeMemHandleBom(Mem *pMem){ int rc = SQLITE_OK; u8 bom = 0; if( pMem->n<0 || pMem->n>1 ){ u8 b1 = *(u8 *)pMem->z; u8 b2 = *(((u8 *)pMem->z) + 1); if( b1==0xFE && b2==0xFF ){ bom = SQLITE_UTF16BE; } if( b1==0xFF && b2==0xFE ){ bom = SQLITE_UTF16LE; } } if( bom ){ /* This function is called as soon as a string is stored in a Mem*, ** from within sqlite3VdbeMemSetStr(). At that point it is not possible ** for the string to be stored in Mem.zShort, or for it to be stored ** in dynamic memory with no destructor. */ assert( !(pMem->flags&MEM_Short) ); assert( !(pMem->flags&MEM_Dyn) || pMem->xDel ); if( pMem->flags & MEM_Dyn ){ void (*xDel)(void*) = pMem->xDel; char *z = pMem->z; pMem->z = 0; pMem->xDel = 0; rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT); xDel(z); }else{ rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom, SQLITE_TRANSIENT); } } return rc;}#endif /* SQLITE_OMIT_UTF16 *//*** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,** return the number of unicode characters in pZ up to (but not including)** the first 0x00 byte. If nByte is not less than zero, return the** number of unicode characters in the first nByte of pZ (or up to ** the first 0x00, whichever comes first).*/int sqlite3utf8CharLen(const char *z, int nByte){ int r = 0; const char *zTerm; if( nByte>=0 ){ zTerm = &z[nByte]; }else{ zTerm = (const char *)(-1); } assert( z<=zTerm ); while( *z!=0 && z<zTerm ){ SKIP_UTF8(z); r++; } return r;}#ifndef SQLITE_OMIT_UTF16/*** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,** return the number of bytes up to (but not including), the first pair** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,** then return the number of bytes in the first nChar unicode characters** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).*/int sqlite3utf16ByteLen(const void *zIn, int nChar){ int c = 1; char const *z = zIn; int n = 0; if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){ while( c && ((nChar<0) || n<nChar) ){ READ_UTF16BE(z, c); n++; } }else{ while( c && ((nChar<0) || n<nChar) ){ READ_UTF16LE(z, c); n++; } } return (z-(char const *)zIn)-((c==0)?2:0);}/*** UTF-16 implementation of the substr()*/void sqlite3utf16Substr( sqlite3_context *context, int argc, sqlite3_value **argv){ int y, z; unsigned char const *zStr; unsigned char const *zStrEnd; unsigned char const *zStart; unsigned char const *zEnd; int i; zStr = (unsigned char const *)sqlite3_value_text16(argv[0]); zStrEnd = &zStr[sqlite3_value_bytes16(argv[0])]; y = sqlite3_value_int(argv[1]); z = sqlite3_value_int(argv[2]); if( y>0 ){ y = y-1; zStart = zStr; if( SQLITE_UTF16BE==SQLITE_UTF16NATIVE ){ for(i=0; i<y && zStart<zStrEnd; i++) SKIP_UTF16BE(zStart); }else{ for(i=0; i<y && zStart<zStrEnd; i++) SKIP_UTF16LE(zStart); } }else{ zStart = zStrEnd; if( SQLITE_UTF16BE==SQLITE_UTF16NATIVE ){ for(i=y; i<0 && zStart>zStr; i++) RSKIP_UTF16BE(zStart); }else{ for(i=y; i<0 && zStart>zStr; i++) RSKIP_UTF16LE(zStart); } for(; i<0; i++) z -= 1; } zEnd = zStart; if( SQLITE_UTF16BE==SQLITE_UTF16NATIVE ){ for(i=0; i<z && zEnd<zStrEnd; i++) SKIP_UTF16BE(zEnd); }else{ for(i=0; i<z && zEnd<zStrEnd; i++) SKIP_UTF16LE(zEnd); } sqlite3_result_text16(context, zStart, zEnd-zStart, SQLITE_TRANSIENT);}#if defined(SQLITE_TEST)/*** This routine is called from the TCL test function "translate_selftest".** It checks that the primitives for serializing and deserializing** characters in each encoding are inverses of each other.*/void sqlite3utfSelfTest(){ int i; unsigned char zBuf[20]; unsigned char *z; int n; int c; for(i=0; i<0x00110000; i++){ z = zBuf; WRITE_UTF8(z, i); n = z-zBuf; z = zBuf; READ_UTF8(z, c); assert( c==i ); assert( (z-zBuf)==n ); } for(i=0; i<0x00110000; i++){ if( i>=0xD800 && i<=0xE000 ) continue; z = zBuf; WRITE_UTF16LE(z, i); n = z-zBuf; z = zBuf; READ_UTF16LE(z, c); assert( c==i ); assert( (z-zBuf)==n ); } for(i=0; i<0x00110000; i++){ if( i>=0xD800 && i<=0xE000 ) continue; z = zBuf; WRITE_UTF16BE(z, i); n = z-zBuf; z = zBuf; READ_UTF16BE(z, c); assert( c==i ); assert( (z-zBuf)==n ); }}#endif /* SQLITE_TEST */#endif /* SQLITE_OMIT_UTF16 */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -