📄 utf.c

📁 sqlite-3.4.1,嵌入式数据库.是一个功能强大的开源数据库,给学习和研发以及小型公司的发展带来了全所未有的好处.
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
        sqliteFree(zFree);      }    }    if( desiredEnc==SQLITE_UTF16LE ){      /* UTF-8 -> UTF-16 Little-endian */      while( zIn<zTerm ){        SQLITE_READ_UTF8(zIn, c);         WRITE_UTF16LE(z, c);      }      if( iExtra!=0xD800 ){        WRITE_UTF16LE(z, iExtra);      }    }else{      assert( desiredEnc==SQLITE_UTF16BE );      /* UTF-8 -> UTF-16 Big-endian */      while( zIn<zTerm ){        SQLITE_READ_UTF8(zIn, c);         WRITE_UTF16BE(z, c);      }      if( iExtra!=0xD800 ){        WRITE_UTF16BE(z, iExtra);      }    }    pMem->n = z - zOut;    *z++ = 0;  }else{    assert( desiredEnc==SQLITE_UTF8 );    if( pMem->enc==SQLITE_UTF16LE ){      /* UTF-16 Little-endian -> UTF-8 */      while( zIn<zTerm ){        READ_UTF16LE(zIn, c);         WRITE_UTF8(z, c);      }    }else{      /* UTF-16 Little-endian -> UTF-8 */      while( zIn<zTerm ){        READ_UTF16BE(zIn, c);         WRITE_UTF8(z, c);      }    }    pMem->n = z - zOut;  }  *z = 0;  assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );  sqlite3VdbeMemRelease(pMem);  pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);  pMem->enc = desiredEnc;  if( zOut==zShort ){    memcpy(pMem->zShort, zOut, len);    zOut = (u8*)pMem->zShort;    pMem->flags |= (MEM_Term|MEM_Short);  }else{    pMem->flags |= (MEM_Term|MEM_Dyn);  }  pMem->z = (char*)zOut;translate_out:#if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)  {    char zBuf[100];    sqlite3VdbeMemPrettyPrint(pMem, zBuf);    fprintf(stderr, "OUTPUT: %s\n", zBuf);  }#endif  return SQLITE_OK;}/*** This routine checks for a byte-order mark at the beginning of the ** UTF-16 string stored in *pMem. If one is present, it is removed and** the encoding of the Mem adjusted. This routine does not do any** byte-swapping, it just sets Mem.enc appropriately.**** The allocation (static, dynamic etc.) and encoding of the Mem may be** changed by this function.*/int sqlite3VdbeMemHandleBom(Mem *pMem){  int rc = SQLITE_OK;  u8 bom = 0;  if( pMem->n<0 || pMem->n>1 ){    u8 b1 = *(u8 *)pMem->z;    u8 b2 = *(((u8 *)pMem->z) + 1);    if( b1==0xFE && b2==0xFF ){      bom = SQLITE_UTF16BE;    }    if( b1==0xFF && b2==0xFE ){      bom = SQLITE_UTF16LE;    }  }    if( bom ){    /* This function is called as soon as a string is stored in a Mem*,    ** from within sqlite3VdbeMemSetStr(). At that point it is not possible    ** for the string to be stored in Mem.zShort, or for it to be stored    ** in dynamic memory with no destructor.    */    assert( !(pMem->flags&MEM_Short) );    assert( !(pMem->flags&MEM_Dyn) || pMem->xDel );    if( pMem->flags & MEM_Dyn ){      void (*xDel)(void*) = pMem->xDel;      char *z = pMem->z;      pMem->z = 0;      pMem->xDel = 0;      rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);      xDel(z);    }else{      rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom,           SQLITE_TRANSIENT);    }  }  return rc;}#endif /* SQLITE_OMIT_UTF16 *//*** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,** return the number of unicode characters in pZ up to (but not including)** the first 0x00 byte. If nByte is not less than zero, return the** number of unicode characters in the first nByte of pZ (or up to ** the first 0x00, whichever comes first).*/int sqlite3Utf8CharLen(const char *zIn, int nByte){  int r = 0;  const u8 *z = (const u8*)zIn;  const u8 *zTerm;  if( nByte>=0 ){    zTerm = &z[nByte];  }else{    zTerm = (const u8*)(-1);  }  assert( z<=zTerm );  while( *z!=0 && z<zTerm ){    SQLITE_SKIP_UTF8(z);    r++;  }  return r;}#ifndef SQLITE_OMIT_UTF16/*** Convert a UTF-16 string in the native encoding into a UTF-8 string.** Memory to hold the UTF-8 string is obtained from malloc and must be** freed by the calling function.**** NULL is returned if there is an allocation error.*/char *sqlite3Utf16to8(const void *z, int nByte){  Mem m;  memset(&m, 0, sizeof(m));  sqlite3VdbeMemSetStr(&m, z, nByte, SQLITE_UTF16NATIVE, SQLITE_STATIC);  sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8);  assert( (m.flags & MEM_Term)!=0 || sqlite3MallocFailed() );  assert( (m.flags & MEM_Str)!=0 || sqlite3MallocFailed() );  return (m.flags & MEM_Dyn)!=0 ? m.z : sqliteStrDup(m.z);}/*** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,** return the number of bytes up to (but not including), the first pair** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,** then return the number of bytes in the first nChar unicode characters** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).*/int sqlite3Utf16ByteLen(const void *zIn, int nChar){  unsigned int c = 1;  char const *z = zIn;  int n = 0;  if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){    /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here    ** and in other parts of this file means that at one branch will    ** not be covered by coverage testing on any single host. But coverage    ** will be complete if the tests are run on both a little-endian and     ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE    ** macros are constant at compile time the compiler can determine    ** which branch will be followed. It is therefore assumed that no runtime    ** penalty is paid for this "if" statement.    */    while( c && ((nChar<0) || n<nChar) ){      READ_UTF16BE(z, c);      n++;    }  }else{    while( c && ((nChar<0) || n<nChar) ){      READ_UTF16LE(z, c);      n++;    }  }  return (z-(char const *)zIn)-((c==0)?2:0);}#if defined(SQLITE_TEST)/*** Translate UTF-8 to UTF-8.**** This has the effect of making sure that the string is well-formed** UTF-8.  Miscoded characters are removed.**** The translation is done in-place (since it is impossible for the** correct UTF-8 encoding to be longer than a malformed encoding).*/int sqlite3Utf8To8(unsigned char *zIn){  unsigned char *zOut = zIn;  unsigned char *zStart = zIn;  int c;  while(1){    SQLITE_READ_UTF8(zIn, c);    if( c==0 ) break;    if( c!=0xfffd ){      WRITE_UTF8(zOut, c);    }  }  *zOut = 0;  return zOut - zStart;}#endif#if defined(SQLITE_TEST)/*** This routine is called from the TCL test function "translate_selftest".** It checks that the primitives for serializing and deserializing** characters in each encoding are inverses of each other.*/void sqlite3UtfSelfTest(){  unsigned int i, t;  unsigned char zBuf[20];  unsigned char *z;  int n;  unsigned int c;  for(i=0; i<0x00110000; i++){    z = zBuf;    WRITE_UTF8(z, i);    n = z-zBuf;    z[0] = 0;    z = zBuf;    SQLITE_READ_UTF8(z, c);    t = i;    if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;    if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;    assert( c==t );    assert( (z-zBuf)==n );  }  for(i=0; i<0x00110000; i++){    if( i>=0xD800 && i<0xE000 ) continue;    z = zBuf;    WRITE_UTF16LE(z, i);    n = z-zBuf;    z[0] = 0;    z = zBuf;    READ_UTF16LE(z, c);    assert( c==i );    assert( (z-zBuf)==n );  }  for(i=0; i<0x00110000; i++){    if( i>=0xD800 && i<0xE000 ) continue;    z = zBuf;    WRITE_UTF16BE(z, i);    n = z-zBuf;    z[0] = 0;    z = zBuf;    READ_UTF16BE(z, c);    assert( c==i );    assert( (z-zBuf)==n );  }}#endif /* SQLITE_TEST */#endif /* SQLITE_OMIT_UTF16 */
上一页 12
💿 文件大小 2186 K
👤 上传用户 hahuhuhuhu
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#sqlite #嵌入式数据库 #发展 #开源
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -