⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 simple_tokenizer.c

📁 最新的sqlite3.6.2源代码
💻 C
字号:
/*** The author disclaims copyright to this source code.***************************************************************************** Implementation of the "simple" full-text-search tokenizer.*/#include <assert.h>#if !defined(__APPLE__)#include <malloc.h>#else#include <stdlib.h>#endif#include <stdio.h>#include <string.h>#include <ctype.h>#include "tokenizer.h"/* Duplicate a string; the caller must free() the returned string. * (We don't use strdup() since it's not part of the standard C library and * may not be available everywhere.) *//* TODO(shess) Copied from fulltext.c, consider util.c for such** things. */static char *string_dup(const char *s){  char *str = malloc(strlen(s) + 1);  strcpy(str, s);  return str;}typedef struct simple_tokenizer {  sqlite3_tokenizer base;  const char *zDelim;          /* token delimiters */} simple_tokenizer;typedef struct simple_tokenizer_cursor {  sqlite3_tokenizer_cursor base;  const char *pInput;          /* input we are tokenizing */  int nBytes;                  /* size of the input */  const char *pCurrent;        /* current position in pInput */  int iToken;                  /* index of next token to be returned */  char *zToken;                /* storage for current token */  int nTokenBytes;             /* actual size of current token */  int nTokenAllocated;         /* space allocated to zToken buffer */} simple_tokenizer_cursor;static sqlite3_tokenizer_module simpleTokenizerModule;/* forward declaration */static int simpleCreate(  int argc, const char **argv,  sqlite3_tokenizer **ppTokenizer){  simple_tokenizer *t;  t = (simple_tokenizer *) malloc(sizeof(simple_tokenizer));  /* TODO(shess) Delimiters need to remain the same from run to run,  ** else we need to reindex.  One solution would be a meta-table to  ** track such information in the database, then we'd only want this  ** information on the initial create.  */  if( argc>1 ){    t->zDelim = string_dup(argv[1]);  } else {    /* Build a string excluding alphanumeric ASCII characters */    char zDelim[0x80];               /* nul-terminated, so nul not a member */    int i, j;    for(i=1, j=0; i<0x80; i++){      if( !isalnum(i) ){        zDelim[j++] = i;      }    }    zDelim[j++] = '\0';    assert( j<=sizeof(zDelim) );    t->zDelim = string_dup(zDelim);  }  *ppTokenizer = &t->base;  return SQLITE_OK;}static int simpleDestroy(sqlite3_tokenizer *pTokenizer){  simple_tokenizer *t = (simple_tokenizer *) pTokenizer;  free((void *) t->zDelim);  free(t);  return SQLITE_OK;}static int simpleOpen(  sqlite3_tokenizer *pTokenizer,  const char *pInput, int nBytes,  sqlite3_tokenizer_cursor **ppCursor){  simple_tokenizer_cursor *c;  c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor));  c->pInput = pInput;  c->nBytes = nBytes<0 ? (int) strlen(pInput) : nBytes;  c->pCurrent = c->pInput;        /* start tokenizing at the beginning */  c->iToken = 0;  c->zToken = NULL;               /* no space allocated, yet. */  c->nTokenBytes = 0;  c->nTokenAllocated = 0;  *ppCursor = &c->base;  return SQLITE_OK;}static int simpleClose(sqlite3_tokenizer_cursor *pCursor){  simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;  if( NULL!=c->zToken ){    free(c->zToken);  }  free(c);  return SQLITE_OK;}static int simpleNext(  sqlite3_tokenizer_cursor *pCursor,  const char **ppToken, int *pnBytes,  int *piStartOffset, int *piEndOffset, int *piPosition){  simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;  simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;  int ii;  while( c->pCurrent-c->pInput<c->nBytes ){    int n = (int) strcspn(c->pCurrent, t->zDelim);    if( n>0 ){      if( n+1>c->nTokenAllocated ){        c->zToken = realloc(c->zToken, n+1);      }      for(ii=0; ii<n; ii++){        /* TODO(shess) This needs expansion to handle UTF-8        ** case-insensitivity.        */        char ch = c->pCurrent[ii];        c->zToken[ii] = (unsigned char)ch<0x80 ? tolower(ch) : ch;      }      c->zToken[n] = '\0';      *ppToken = c->zToken;      *pnBytes = n;      *piStartOffset = (int) (c->pCurrent-c->pInput);      *piEndOffset = *piStartOffset+n;      *piPosition = c->iToken++;      c->pCurrent += n + 1;      return SQLITE_OK;    }    c->pCurrent += n + 1;    /* TODO(shess) could strspn() to skip delimiters en masse.  Needs    ** to happen in two places, though, which is annoying.    */  }  return SQLITE_DONE;}static sqlite3_tokenizer_module simpleTokenizerModule = {  0,  simpleCreate,  simpleDestroy,  simpleOpen,  simpleClose,  simpleNext,};void get_simple_tokenizer_module(  sqlite3_tokenizer_module **ppModule){  *ppModule = &simpleTokenizerModule;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -