tokenize.c

来自「sqlite 嵌入式数据库的源码」· C语言代码 · 共 671 行 · 第 1/2 页
671 行
/*** 2001 September 15**** The author disclaims copyright to this source code.  In place of** a legal notice, here is a blessing:****    May you do good and not evil.**    May you find forgiveness for yourself and forgive others.**    May you share freely, never taking more than you give.***************************************************************************** An tokenizer for SQL**** This file contains C code that splits an SQL input string up into** individual tokens and sends those tokens one-by-one over to the** parser for analysis.**** $Id: tokenize.c,v 1.103 2005/06/06 14:45:43 drh Exp $*/#include "sqliteInt.h"#include "os.h"#include <ctype.h>#include <stdlib.h>/*** The sqlite3KeywordCode function looks up an identifier to determine if** it is a keyword.  If it is a keyword, the token code of that keyword is ** returned.  If the input is not a keyword, TK_ID is returned.**** The implementation of this routine was generated by a program,** mkkeywordhash.h, located in the tool subdirectory of the distribution.** The output of the mkkeywordhash.c program is written into a file** named keywordhash.h and then included into this source file by** the #include below.*/#include "keywordhash.h"/*** If X is a character that can be used in an identifier and** X&0x80==0 then isIdChar[X] will be 1.  If X&0x80==0x80 then** X is always an identifier character.  (Hence all UTF-8** characters can be part of an identifier).  isIdChar[X] will** be 0 for every character in the lower 128 ASCII characters** that cannot be used as part of an identifier.**** In this implementation, an identifier can be a string of** alphabetic characters, digits, and "_" plus any character** with the high-order bit set.  The latter rule means that** any sequence of UTF-8 characters or characters taken from** an extended ISO8859 character set can form an identifier.**** Ticket #1066.  the SQL standard does not allow '$' in the** middle of identfiers.  But many SQL implementations do. ** SQLite will allow '$' in identifiers for compatibility.** But the feature is undocumented.*/static const char isIdChar[] = {/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */    0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */};#define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))/*** Return the length of the token that begins at z[0]. ** Store the token type in *tokenType before returning.*/static int getToken(const unsigned char *z, int *tokenType){  int i, c;  switch( *z ){    case ' ': case '\t': case '\n': case '\f': case '\r': {      for(i=1; isspace(z[i]); i++){}      *tokenType = TK_SPACE;      return i;    }    case '-': {      if( z[1]=='-' ){        for(i=2; (c=z[i])!=0 && c!='\n'; i++){}        *tokenType = TK_COMMENT;        return i;      }      *tokenType = TK_MINUS;      return 1;    }    case '(': {      *tokenType = TK_LP;      return 1;    }    case ')': {      *tokenType = TK_RP;      return 1;    }    case ';': {      *tokenType = TK_SEMI;      return 1;    }    case '+': {      *tokenType = TK_PLUS;      return 1;    }    case '*': {      *tokenType = TK_STAR;      return 1;    }    case '/': {      if( z[1]!='*' || z[2]==0 ){        *tokenType = TK_SLASH;        return 1;      }      for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}      if( c ) i++;      *tokenType = TK_COMMENT;      return i;    }    case '%': {      *tokenType = TK_REM;      return 1;    }    case '=': {      *tokenType = TK_EQ;      return 1 + (z[1]=='=');    }    case '<': {      if( (c=z[1])=='=' ){        *tokenType = TK_LE;        return 2;      }else if( c=='>' ){        *tokenType = TK_NE;        return 2;      }else if( c=='<' ){        *tokenType = TK_LSHIFT;        return 2;      }else{        *tokenType = TK_LT;        return 1;      }    }    case '>': {      if( (c=z[1])=='=' ){        *tokenType = TK_GE;        return 2;      }else if( c=='>' ){        *tokenType = TK_RSHIFT;        return 2;      }else{        *tokenType = TK_GT;        return 1;      }    }    case '!': {      if( z[1]!='=' ){        *tokenType = TK_ILLEGAL;        return 2;      }else{        *tokenType = TK_NE;        return 2;      }    }    case '|': {      if( z[1]!='|' ){        *tokenType = TK_BITOR;        return 1;      }else{        *tokenType = TK_CONCAT;        return 2;      }    }    case ',': {      *tokenType = TK_COMMA;      return 1;    }    case '&': {      *tokenType = TK_BITAND;      return 1;    }    case '~': {      *tokenType = TK_BITNOT;      return 1;    }    case '#': {      for(i=1; isdigit(z[i]) || (i==1 && z[1]=='-'); i++){}      *tokenType = TK_REGISTER;      return i;    }    case '\'': case '"': {      int delim = z[0];      for(i=1; (c=z[i])!=0; i++){        if( c==delim ){          if( z[i+1]==delim ){            i++;          }else{            break;          }        }      }      if( c ) i++;      *tokenType = TK_STRING;      return i;    }    case '.': {      *tokenType = TK_DOT;      return 1;    }    case '0': case '1': case '2': case '3': case '4':    case '5': case '6': case '7': case '8': case '9': {      *tokenType = TK_INTEGER;      for(i=1; isdigit(z[i]); i++){}#ifndef SQLITE_OMIT_FLOATING_POINT      if( z[i]=='.' && isdigit(z[i+1]) ){        i += 2;        while( isdigit(z[i]) ){ i++; }        *tokenType = TK_FLOAT;      }      if( (z[i]=='e' || z[i]=='E') &&           ( isdigit(z[i+1])             || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))           )      ){        i += 2;        while( isdigit(z[i]) ){ i++; }        *tokenType = TK_FLOAT;      }#endif      return i;    }    case '[': {      for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}      *tokenType = TK_ID;      return i;    }    case '?': {      *tokenType = TK_VARIABLE;      for(i=1; isdigit(z[i]); i++){}      return i;    }    case ':': {      for(i=1; IdChar(z[i]); i++){}      *tokenType = i>1 ? TK_VARIABLE : TK_ILLEGAL;      return i;    }#ifndef SQLITE_OMIT_TCL_VARIABLE    case '$': {      *tokenType = TK_VARIABLE;      if( z[1]=='{' ){        int nBrace = 1;        for(i=2; (c=z[i])!=0 && nBrace; i++){          if( c=='{' ){            nBrace++;          }else if( c=='}' ){            nBrace--;          }        }        if( c==0 ) *tokenType = TK_ILLEGAL;      }else{        int n = 0;        for(i=1; (c=z[i])!=0; i++){          if( isalnum(c) || c=='_' ){            n++;          }else if( c=='(' && n>0 ){            do{              i++;            }while( (c=z[i])!=0 && !isspace(c) && c!=')' );            if( c==')' ){              i++;            }else{              *tokenType = TK_ILLEGAL;            }            break;          }else if( c==':' && z[i+1]==':' ){            i++;          }else{            break;          }        }        if( n==0 ) *tokenType = TK_ILLEGAL;      }      return i;    }#endif#ifndef SQLITE_OMIT_BLOB_LITERAL    case 'x': case 'X': {      if( (c=z[1])=='\'' || c=='"' ){        int delim = c;        *tokenType = TK_BLOB;        for(i=2; (c=z[i])!=0; i++){          if( c==delim ){            if( i%2 ) *tokenType = TK_ILLEGAL;            break;          }          if( !isxdigit(c) ){            *tokenType = TK_ILLEGAL;            return i;          }        }        if( c ) i++;        return i;      }      /* Otherwise fall through to the next case */    }#endif    default: {      if( !IdChar(*z) ){        break;      }      for(i=1; IdChar(z[i]); i++){}      *tokenType = keywordCode((char*)z, i);      return i;    }  }  *tokenType = TK_ILLEGAL;  return 1;}int sqlite3GetToken(const unsigned char *z, int *tokenType){  return getToken(z, tokenType);}/*** Run the parser on the given SQL string.  The parser structure is** passed in.  An SQLITE_ status code is returned.  If an error occurs** and pzErrMsg!=NULL then an error message might be written into ** memory obtained from malloc() and *pzErrMsg made to point to that** error message.  Or maybe not.*/int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){  int nErr = 0;  int i;  void *pEngine;  int tokenType;  int lastTokenParsed = -1;  sqlite3 *db = pParse->db;
tokenize.c - 源码说明

本页面展示了「sqlite 嵌入式数据库的源码」中的 tokenize.c 源码文件，采用 C语言编程语言编写，共 671 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与sqlite相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?