📄 utf.c
字号:
sqliteFree(zFree); } } if( desiredEnc==SQLITE_UTF16LE ){ /* UTF-8 -> UTF-16 Little-endian */ while( zIn<zTerm ){ SQLITE_READ_UTF8(zIn, c); WRITE_UTF16LE(z, c); } if( iExtra!=0xD800 ){ WRITE_UTF16LE(z, iExtra); } }else{ assert( desiredEnc==SQLITE_UTF16BE ); /* UTF-8 -> UTF-16 Big-endian */ while( zIn<zTerm ){ SQLITE_READ_UTF8(zIn, c); WRITE_UTF16BE(z, c); } if( iExtra!=0xD800 ){ WRITE_UTF16BE(z, iExtra); } } pMem->n = z - zOut; *z++ = 0; }else{ assert( desiredEnc==SQLITE_UTF8 ); if( pMem->enc==SQLITE_UTF16LE ){ /* UTF-16 Little-endian -> UTF-8 */ while( zIn<zTerm ){ READ_UTF16LE(zIn, c); WRITE_UTF8(z, c); } }else{ /* UTF-16 Little-endian -> UTF-8 */ while( zIn<zTerm ){ READ_UTF16BE(zIn, c); WRITE_UTF8(z, c); } } pMem->n = z - zOut; } *z = 0; assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len ); sqlite3VdbeMemRelease(pMem); pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short); pMem->enc = desiredEnc; if( zOut==zShort ){ memcpy(pMem->zShort, zOut, len); zOut = (u8*)pMem->zShort; pMem->flags |= (MEM_Term|MEM_Short); }else{ pMem->flags |= (MEM_Term|MEM_Dyn); } pMem->z = (char*)zOut;translate_out:#if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG) { char zBuf[100]; sqlite3VdbeMemPrettyPrint(pMem, zBuf); fprintf(stderr, "OUTPUT: %s\n", zBuf); }#endif return SQLITE_OK;}/*** This routine checks for a byte-order mark at the beginning of the ** UTF-16 string stored in *pMem. If one is present, it is removed and** the encoding of the Mem adjusted. This routine does not do any** byte-swapping, it just sets Mem.enc appropriately.**** The allocation (static, dynamic etc.) and encoding of the Mem may be** changed by this function.*/int sqlite3VdbeMemHandleBom(Mem *pMem){ int rc = SQLITE_OK; u8 bom = 0; if( pMem->n<0 || pMem->n>1 ){ u8 b1 = *(u8 *)pMem->z; u8 b2 = *(((u8 *)pMem->z) + 1); if( b1==0xFE && b2==0xFF ){ bom = SQLITE_UTF16BE; } if( b1==0xFF && b2==0xFE ){ bom = SQLITE_UTF16LE; } } if( bom ){ /* This function is called as soon as a string is stored in a Mem*, ** from within sqlite3VdbeMemSetStr(). At that point it is not possible ** for the string to be stored in Mem.zShort, or for it to be stored ** in dynamic memory with no destructor. */ assert( !(pMem->flags&MEM_Short) ); assert( !(pMem->flags&MEM_Dyn) || pMem->xDel ); if( pMem->flags & MEM_Dyn ){ void (*xDel)(void*) = pMem->xDel; char *z = pMem->z; pMem->z = 0; pMem->xDel = 0; rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT); xDel(z); }else{ rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom, SQLITE_TRANSIENT); } } return rc;}#endif /* SQLITE_OMIT_UTF16 *//*** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,** return the number of unicode characters in pZ up to (but not including)** the first 0x00 byte. If nByte is not less than zero, return the** number of unicode characters in the first nByte of pZ (or up to ** the first 0x00, whichever comes first).*/int sqlite3Utf8CharLen(const char *zIn, int nByte){ int r = 0; const u8 *z = (const u8*)zIn; const u8 *zTerm; if( nByte>=0 ){ zTerm = &z[nByte]; }else{ zTerm = (const u8*)(-1); } assert( z<=zTerm ); while( *z!=0 && z<zTerm ){ SQLITE_SKIP_UTF8(z); r++; } return r;}#ifndef SQLITE_OMIT_UTF16/*** Convert a UTF-16 string in the native encoding into a UTF-8 string.** Memory to hold the UTF-8 string is obtained from malloc and must be** freed by the calling function.**** NULL is returned if there is an allocation error.*/char *sqlite3Utf16to8(const void *z, int nByte){ Mem m; memset(&m, 0, sizeof(m)); sqlite3VdbeMemSetStr(&m, z, nByte, SQLITE_UTF16NATIVE, SQLITE_STATIC); sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8); assert( (m.flags & MEM_Term)!=0 || sqlite3MallocFailed() ); assert( (m.flags & MEM_Str)!=0 || sqlite3MallocFailed() ); return (m.flags & MEM_Dyn)!=0 ? m.z : sqliteStrDup(m.z);}/*** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,** return the number of bytes up to (but not including), the first pair** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,** then return the number of bytes in the first nChar unicode characters** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).*/int sqlite3Utf16ByteLen(const void *zIn, int nChar){ unsigned int c = 1; char const *z = zIn; int n = 0; if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){ /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here ** and in other parts of this file means that at one branch will ** not be covered by coverage testing on any single host. But coverage ** will be complete if the tests are run on both a little-endian and ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE ** macros are constant at compile time the compiler can determine ** which branch will be followed. It is therefore assumed that no runtime ** penalty is paid for this "if" statement. */ while( c && ((nChar<0) || n<nChar) ){ READ_UTF16BE(z, c); n++; } }else{ while( c && ((nChar<0) || n<nChar) ){ READ_UTF16LE(z, c); n++; } } return (z-(char const *)zIn)-((c==0)?2:0);}#if defined(SQLITE_TEST)/*** Translate UTF-8 to UTF-8.**** This has the effect of making sure that the string is well-formed** UTF-8. Miscoded characters are removed.**** The translation is done in-place (since it is impossible for the** correct UTF-8 encoding to be longer than a malformed encoding).*/int sqlite3Utf8To8(unsigned char *zIn){ unsigned char *zOut = zIn; unsigned char *zStart = zIn; int c; while(1){ SQLITE_READ_UTF8(zIn, c); if( c==0 ) break; if( c!=0xfffd ){ WRITE_UTF8(zOut, c); } } *zOut = 0; return zOut - zStart;}#endif#if defined(SQLITE_TEST)/*** This routine is called from the TCL test function "translate_selftest".** It checks that the primitives for serializing and deserializing** characters in each encoding are inverses of each other.*/void sqlite3UtfSelfTest(){ unsigned int i, t; unsigned char zBuf[20]; unsigned char *z; int n; unsigned int c; for(i=0; i<0x00110000; i++){ z = zBuf; WRITE_UTF8(z, i); n = z-zBuf; z[0] = 0; z = zBuf; SQLITE_READ_UTF8(z, c); t = i; if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD; if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD; assert( c==t ); assert( (z-zBuf)==n ); } for(i=0; i<0x00110000; i++){ if( i>=0xD800 && i<0xE000 ) continue; z = zBuf; WRITE_UTF16LE(z, i); n = z-zBuf; z[0] = 0; z = zBuf; READ_UTF16LE(z, c); assert( c==i ); assert( (z-zBuf)==n ); } for(i=0; i<0x00110000; i++){ if( i>=0xD800 && i<0xE000 ) continue; z = zBuf; WRITE_UTF16BE(z, i); n = z-zBuf; z[0] = 0; z = zBuf; READ_UTF16BE(z, c); assert( c==i ); assert( (z-zBuf)==n ); }}#endif /* SQLITE_TEST */#endif /* SQLITE_OMIT_UTF16 */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -