⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fts2_icu.c

📁 最新的sqlite3.6.2源代码
💻 C
字号:
/*** 2007 June 22**** The author disclaims copyright to this source code.  In place of** a legal notice, here is a blessing:****    May you do good and not evil.**    May you find forgiveness for yourself and forgive others.**    May you share freely, never taking more than you give.***************************************************************************** This file implements a tokenizer for fts2 based on the ICU library.** ** $Id: fts2_icu.c,v 1.2 2008/07/22 22:20:50 shess Exp $*/#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)#ifdef SQLITE_ENABLE_ICU#include <assert.h>#include <string.h>#include "fts2_tokenizer.h"#include <unicode/ubrk.h>#include <unicode/ucol.h>#include <unicode/ustring.h>#include <unicode/utf16.h>typedef struct IcuTokenizer IcuTokenizer;typedef struct IcuCursor IcuCursor;struct IcuTokenizer {  sqlite3_tokenizer base;  char *zLocale;};struct IcuCursor {  sqlite3_tokenizer_cursor base;  UBreakIterator *pIter;      /* ICU break-iterator object */  int nChar;                  /* Number of UChar elements in pInput */  UChar *aChar;               /* Copy of input using utf-16 encoding */  int *aOffset;               /* Offsets of each character in utf-8 input */  int nBuffer;  char *zBuffer;  int iToken;};/*** Create a new tokenizer instance.*/static int icuCreate(  int argc,                            /* Number of entries in argv[] */  const char * const *argv,            /* Tokenizer creation arguments */  sqlite3_tokenizer **ppTokenizer      /* OUT: Created tokenizer */){  IcuTokenizer *p;  int n = 0;  if( argc>0 ){    n = strlen(argv[0])+1;  }  p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);  if( !p ){    return SQLITE_NOMEM;  }  memset(p, 0, sizeof(IcuTokenizer));  if( n ){    p->zLocale = (char *)&p[1];    memcpy(p->zLocale, argv[0], n);  }  *ppTokenizer = (sqlite3_tokenizer *)p;  return SQLITE_OK;}/*** Destroy a tokenizer*/static int icuDestroy(sqlite3_tokenizer *pTokenizer){  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;  sqlite3_free(p);  return SQLITE_OK;}/*** Prepare to begin tokenizing a particular string.  The input** string to be tokenized is pInput[0..nBytes-1].  A cursor** used to incrementally tokenize this string is returned in ** *ppCursor.*/static int icuOpen(  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */  const char *zInput,                    /* Input string */  int nInput,                            /* Length of zInput in bytes */  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */){  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;  IcuCursor *pCsr;  const int32_t opt = U_FOLD_CASE_DEFAULT;  UErrorCode status = U_ZERO_ERROR;  int nChar;  UChar32 c;  int iInput = 0;  int iOut = 0;  *ppCursor = 0;  if( -1 == nInput ) nInput = strlen(nInput);  nChar = nInput+1;  pCsr = (IcuCursor *)sqlite3_malloc(      sizeof(IcuCursor) +                /* IcuCursor */      nChar * sizeof(UChar) +            /* IcuCursor.aChar[] */      (nChar+1) * sizeof(int)            /* IcuCursor.aOffset[] */  );  if( !pCsr ){    return SQLITE_NOMEM;  }  memset(pCsr, 0, sizeof(IcuCursor));  pCsr->aChar = (UChar *)&pCsr[1];  pCsr->aOffset = (int *)&pCsr->aChar[nChar];  pCsr->aOffset[iOut] = iInput;  U8_NEXT(zInput, iInput, nInput, c);   while( c>0 ){    int isError = 0;    c = u_foldCase(c, opt);    U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);    if( isError ){      sqlite3_free(pCsr);      return SQLITE_ERROR;    }    pCsr->aOffset[iOut] = iInput;    if( iInput<nInput ){      U8_NEXT(zInput, iInput, nInput, c);    }else{      c = 0;    }  }  pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);  if( !U_SUCCESS(status) ){    sqlite3_free(pCsr);    return SQLITE_ERROR;  }  pCsr->nChar = iOut;  ubrk_first(pCsr->pIter);  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;  return SQLITE_OK;}/*** Close a tokenization cursor previously opened by a call to icuOpen().*/static int icuClose(sqlite3_tokenizer_cursor *pCursor){  IcuCursor *pCsr = (IcuCursor *)pCursor;  ubrk_close(pCsr->pIter);  sqlite3_free(pCsr->zBuffer);  sqlite3_free(pCsr);  return SQLITE_OK;}/*** Extract the next token from a tokenization cursor.*/static int icuNext(  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by simpleOpen */  const char **ppToken,               /* OUT: *ppToken is the token text */  int *pnBytes,                       /* OUT: Number of bytes in token */  int *piStartOffset,                 /* OUT: Starting offset of token */  int *piEndOffset,                   /* OUT: Ending offset of token */  int *piPosition                     /* OUT: Position integer of token */){  IcuCursor *pCsr = (IcuCursor *)pCursor;  int iStart = 0;  int iEnd = 0;  int nByte = 0;  while( iStart==iEnd ){    UChar32 c;    iStart = ubrk_current(pCsr->pIter);    iEnd = ubrk_next(pCsr->pIter);    if( iEnd==UBRK_DONE ){      return SQLITE_DONE;    }    while( iStart<iEnd ){      int iWhite = iStart;      U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);      if( u_isspace(c) ){        iStart = iWhite;      }else{        break;      }    }    assert(iStart<=iEnd);  }  do {    UErrorCode status = U_ZERO_ERROR;    if( nByte ){      char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);      if( !zNew ){        return SQLITE_NOMEM;      }      pCsr->zBuffer = zNew;      pCsr->nBuffer = nByte;    }    u_strToUTF8(        pCsr->zBuffer, pCsr->nBuffer, &nByte,    /* Output vars */        &pCsr->aChar[iStart], iEnd-iStart,       /* Input vars */        &status                                  /* Output success/failure */    );  } while( nByte>pCsr->nBuffer );  *ppToken = pCsr->zBuffer;  *pnBytes = nByte;  *piStartOffset = pCsr->aOffset[iStart];  *piEndOffset = pCsr->aOffset[iEnd];  *piPosition = pCsr->iToken++;  return SQLITE_OK;}/*** The set of routines that implement the simple tokenizer*/static const sqlite3_tokenizer_module icuTokenizerModule = {  0,                           /* iVersion */  icuCreate,                   /* xCreate  */  icuDestroy,                  /* xCreate  */  icuOpen,                     /* xOpen    */  icuClose,                    /* xClose   */  icuNext,                     /* xNext    */};/*** Set *ppModule to point at the implementation of the ICU tokenizer.*/void sqlite3Fts2IcuTokenizerModule(  sqlite3_tokenizer_module const**ppModule){  *ppModule = &icuTokenizerModule;}#endif /* defined(SQLITE_ENABLE_ICU) */#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -