📄 utf.c

📁 sqlite 嵌入式数据库的源码
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
  /* Set len to the maximum number of bytes required in the output buffer. */  if( desiredEnc==SQLITE_UTF8 ){    /* When converting from UTF-16, the maximum growth results from    ** translating a 2-byte character to a 3-byte UTF-8 character (i.e.    ** code-point 0xFFFC). A single byte is required for the output string    ** nul-terminator.    */    len = (pMem->n/2) * 3 + 1;  }else{    /* When converting from UTF-8 to UTF-16 the maximum growth is caused    ** when a 1-byte UTF-8 character is translated into a 2-byte UTF-16    ** character. Two bytes are required in the output buffer for the    ** nul-terminator.    */    len = pMem->n * 2 + 2;  }  /* Set zIn to point at the start of the input buffer and zTerm to point 1  ** byte past the end.  **  ** Variable zOut is set to point at the output buffer. This may be space  ** obtained from malloc(), or Mem.zShort, if it large enough and not in  ** use, or the zShort array on the stack (see above).  */  zIn = pMem->z;  zTerm = &zIn[pMem->n];  if( len>NBFS ){    zOut = sqliteMallocRaw(len);    if( !zOut ) return SQLITE_NOMEM;  }else{    zOut = zShort;  }  z = zOut;  if( pMem->enc==SQLITE_UTF8 ){    if( desiredEnc==SQLITE_UTF16LE ){      /* UTF-8 -> UTF-16 Little-endian */      while( zIn<zTerm ){        READ_UTF8(zIn, c);         WRITE_UTF16LE(z, c);      }    }else{      assert( desiredEnc==SQLITE_UTF16BE );      /* UTF-8 -> UTF-16 Big-endian */      while( zIn<zTerm ){        READ_UTF8(zIn, c);         WRITE_UTF16BE(z, c);      }    }    pMem->n = z - zOut;    *z++ = 0;  }else{    assert( desiredEnc==SQLITE_UTF8 );    if( pMem->enc==SQLITE_UTF16LE ){      /* UTF-16 Little-endian -> UTF-8 */      while( zIn<zTerm ){        READ_UTF16LE(zIn, c);         WRITE_UTF8(z, c);      }    }else{      /* UTF-16 Little-endian -> UTF-8 */      while( zIn<zTerm ){        READ_UTF16BE(zIn, c);         WRITE_UTF8(z, c);      }    }    pMem->n = z - zOut;  }  *z = 0;  assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );  sqlite3VdbeMemRelease(pMem);  pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);  pMem->enc = desiredEnc;  if( zOut==zShort ){    memcpy(pMem->zShort, zOut, len);    zOut = pMem->zShort;    pMem->flags |= (MEM_Term|MEM_Short);  }else{    pMem->flags |= (MEM_Term|MEM_Dyn);  }  pMem->z = zOut;translate_out:#if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)  {    char zBuf[100];    sqlite3VdbeMemPrettyPrint(pMem, zBuf, 100);    fprintf(stderr, "OUTPUT: %s\n", zBuf);  }#endif  return SQLITE_OK;}/*** This routine checks for a byte-order mark at the beginning of the ** UTF-16 string stored in *pMem. If one is present, it is removed and** the encoding of the Mem adjusted. This routine does not do any** byte-swapping, it just sets Mem.enc appropriately.**** The allocation (static, dynamic etc.) and encoding of the Mem may be** changed by this function.*/int sqlite3VdbeMemHandleBom(Mem *pMem){  int rc = SQLITE_OK;  u8 bom = 0;  if( pMem->n<0 || pMem->n>1 ){    u8 b1 = *(u8 *)pMem->z;    u8 b2 = *(((u8 *)pMem->z) + 1);    if( b1==0xFE && b2==0xFF ){      bom = SQLITE_UTF16BE;    }    if( b1==0xFF && b2==0xFE ){      bom = SQLITE_UTF16LE;    }  }    if( bom ){    /* This function is called as soon as a string is stored in a Mem*,    ** from within sqlite3VdbeMemSetStr(). At that point it is not possible    ** for the string to be stored in Mem.zShort, or for it to be stored    ** in dynamic memory with no destructor.    */    assert( !(pMem->flags&MEM_Short) );    assert( !(pMem->flags&MEM_Dyn) || pMem->xDel );    if( pMem->flags & MEM_Dyn ){      void (*xDel)(void*) = pMem->xDel;      char *z = pMem->z;      pMem->z = 0;      pMem->xDel = 0;      rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);      xDel(z);    }else{      rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom,           SQLITE_TRANSIENT);    }  }  return rc;}#endif /* SQLITE_OMIT_UTF16 *//*** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,** return the number of unicode characters in pZ up to (but not including)** the first 0x00 byte. If nByte is not less than zero, return the** number of unicode characters in the first nByte of pZ (or up to ** the first 0x00, whichever comes first).*/int sqlite3utf8CharLen(const char *z, int nByte){  int r = 0;  const char *zTerm;  if( nByte>=0 ){    zTerm = &z[nByte];  }else{    zTerm = (const char *)(-1);  }  assert( z<=zTerm );  while( *z!=0 && z<zTerm ){    SKIP_UTF8(z);    r++;  }  return r;}#ifndef SQLITE_OMIT_UTF16/*** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,** return the number of bytes up to (but not including), the first pair** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,** then return the number of bytes in the first nChar unicode characters** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).*/int sqlite3utf16ByteLen(const void *zIn, int nChar){  int c = 1;  char const *z = zIn;  int n = 0;  if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){    while( c && ((nChar<0) || n<nChar) ){      READ_UTF16BE(z, c);      n++;    }  }else{    while( c && ((nChar<0) || n<nChar) ){      READ_UTF16LE(z, c);      n++;    }  }  return (z-(char const *)zIn)-((c==0)?2:0);}/*** UTF-16 implementation of the substr()*/void sqlite3utf16Substr(  sqlite3_context *context,  int argc,  sqlite3_value **argv){  int y, z;  unsigned char const *zStr;  unsigned char const *zStrEnd;  unsigned char const *zStart;  unsigned char const *zEnd;  int i;  zStr = (unsigned char const *)sqlite3_value_text16(argv[0]);  zStrEnd = &zStr[sqlite3_value_bytes16(argv[0])];  y = sqlite3_value_int(argv[1]);  z = sqlite3_value_int(argv[2]);  if( y>0 ){    y = y-1;    zStart = zStr;    if( SQLITE_UTF16BE==SQLITE_UTF16NATIVE ){      for(i=0; i<y && zStart<zStrEnd; i++) SKIP_UTF16BE(zStart);    }else{      for(i=0; i<y && zStart<zStrEnd; i++) SKIP_UTF16LE(zStart);    }  }else{    zStart = zStrEnd;    if( SQLITE_UTF16BE==SQLITE_UTF16NATIVE ){      for(i=y; i<0 && zStart>zStr; i++) RSKIP_UTF16BE(zStart);    }else{      for(i=y; i<0 && zStart>zStr; i++) RSKIP_UTF16LE(zStart);    }    for(; i<0; i++) z -= 1;  }  zEnd = zStart;  if( SQLITE_UTF16BE==SQLITE_UTF16NATIVE ){    for(i=0; i<z && zEnd<zStrEnd; i++) SKIP_UTF16BE(zEnd);  }else{    for(i=0; i<z && zEnd<zStrEnd; i++) SKIP_UTF16LE(zEnd);  }  sqlite3_result_text16(context, zStart, zEnd-zStart, SQLITE_TRANSIENT);}#if defined(SQLITE_TEST)/*** This routine is called from the TCL test function "translate_selftest".** It checks that the primitives for serializing and deserializing** characters in each encoding are inverses of each other.*/void sqlite3utfSelfTest(){  int i;  unsigned char zBuf[20];  unsigned char *z;  int n;  int c;  for(i=0; i<0x00110000; i++){    z = zBuf;    WRITE_UTF8(z, i);    n = z-zBuf;    z = zBuf;    READ_UTF8(z, c);    assert( c==i );    assert( (z-zBuf)==n );  }  for(i=0; i<0x00110000; i++){    if( i>=0xD800 && i<=0xE000 ) continue;    z = zBuf;    WRITE_UTF16LE(z, i);    n = z-zBuf;    z = zBuf;    READ_UTF16LE(z, c);    assert( c==i );    assert( (z-zBuf)==n );  }  for(i=0; i<0x00110000; i++){    if( i>=0xD800 && i<=0xE000 ) continue;    z = zBuf;    WRITE_UTF16BE(z, i);    n = z-zBuf;    z = zBuf;    READ_UTF16BE(z, c);    assert( c==i );    assert( (z-zBuf)==n );  }}#endif /* SQLITE_TEST */#endif /* SQLITE_OMIT_UTF16 */
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -