📄 tokenize.c
字号:
/*** 2001 September 15**** The author disclaims copyright to this source code. In place of** a legal notice, here is a blessing:**** May you do good and not evil.** May you find forgiveness for yourself and forgive others.** May you share freely, never taking more than you give.***************************************************************************** An tokenizer for SQL**** This file contains C code that splits an SQL input string up into** individual tokens and sends those tokens one-by-one over to the** parser for analysis.**** $Id: tokenize.c,v 1.68 2004/02/14 23:59:58 drh Exp $*/#include "sqliteInt.h"#include "os.h"#include <ctype.h>#include <stdlib.h>/*** All the keywords of the SQL language are stored as in a hash** table composed of instances of the following structure.*/typedef struct Keyword Keyword;struct Keyword { char *zName; /* The keyword name */ u8 tokenType; /* Token value for this keyword */ u8 len; /* Length of this keyword */ u8 iNext; /* Index in aKeywordTable[] of next with same hash */};/*** These are the keywords*/static Keyword aKeywordTable[] = { { "ABORT", TK_ABORT, }, { "AFTER", TK_AFTER, }, { "ALL", TK_ALL, }, { "AND", TK_AND, }, { "AS", TK_AS, }, { "ASC", TK_ASC, }, { "ATTACH", TK_ATTACH, }, { "BEFORE", TK_BEFORE, }, { "BEGIN", TK_BEGIN, }, { "BETWEEN", TK_BETWEEN, }, { "BY", TK_BY, }, { "CASCADE", TK_CASCADE, }, { "CASE", TK_CASE, }, { "CHECK", TK_CHECK, }, { "CLUSTER", TK_CLUSTER, }, { "COLLATE", TK_COLLATE, }, { "COMMIT", TK_COMMIT, }, { "CONFLICT", TK_CONFLICT, }, { "CONSTRAINT", TK_CONSTRAINT, }, { "COPY", TK_COPY, }, { "CREATE", TK_CREATE, }, { "CROSS", TK_JOIN_KW, }, { "DATABASE", TK_DATABASE, }, { "DEFAULT", TK_DEFAULT, }, { "DEFERRED", TK_DEFERRED, }, { "DEFERRABLE", TK_DEFERRABLE, }, { "DELETE", TK_DELETE, }, { "DELIMITERS", TK_DELIMITERS, }, { "DESC", TK_DESC, }, { "DETACH", TK_DETACH, }, { "DISTINCT", TK_DISTINCT, }, { "DROP", TK_DROP, }, { "END", TK_END, }, { "EACH", TK_EACH, }, { "ELSE", TK_ELSE, }, { "EXCEPT", TK_EXCEPT, }, { "EXPLAIN", TK_EXPLAIN, }, { "FAIL", TK_FAIL, }, { "FOR", TK_FOR, }, { "FOREIGN", TK_FOREIGN, }, { "FROM", TK_FROM, }, { "FULL", TK_JOIN_KW, }, { "GLOB", TK_GLOB, }, { "GROUP", TK_GROUP, }, { "HAVING", TK_HAVING, }, { "IGNORE", TK_IGNORE, }, { "IMMEDIATE", TK_IMMEDIATE, }, { "IN", TK_IN, }, { "INDEX", TK_INDEX, }, { "INITIALLY", TK_INITIALLY, }, { "INNER", TK_JOIN_KW, }, { "INSERT", TK_INSERT, }, { "INSTEAD", TK_INSTEAD, }, { "INTERSECT", TK_INTERSECT, }, { "INTO", TK_INTO, }, { "IS", TK_IS, }, { "ISNULL", TK_ISNULL, }, { "JOIN", TK_JOIN, }, { "KEY", TK_KEY, }, { "LEFT", TK_JOIN_KW, }, { "LIKE", TK_LIKE, }, { "LIMIT", TK_LIMIT, }, { "MATCH", TK_MATCH, }, { "NATURAL", TK_JOIN_KW, }, { "NOT", TK_NOT, }, { "NOTNULL", TK_NOTNULL, }, { "NULL", TK_NULL, }, { "OF", TK_OF, }, { "OFFSET", TK_OFFSET, }, { "ON", TK_ON, }, { "OR", TK_OR, }, { "ORDER", TK_ORDER, }, { "OUTER", TK_JOIN_KW, }, { "PRAGMA", TK_PRAGMA, }, { "PRIMARY", TK_PRIMARY, }, { "RAISE", TK_RAISE, }, { "REFERENCES", TK_REFERENCES, }, { "REPLACE", TK_REPLACE, }, { "RESTRICT", TK_RESTRICT, }, { "RIGHT", TK_JOIN_KW, }, { "ROLLBACK", TK_ROLLBACK, }, { "ROW", TK_ROW, }, { "SELECT", TK_SELECT, }, { "SET", TK_SET, }, { "STATEMENT", TK_STATEMENT, }, { "TABLE", TK_TABLE, }, { "TEMP", TK_TEMP, }, { "TEMPORARY", TK_TEMP, }, { "THEN", TK_THEN, }, { "TRANSACTION", TK_TRANSACTION, }, { "TRIGGER", TK_TRIGGER, }, { "UNION", TK_UNION, }, { "UNIQUE", TK_UNIQUE, }, { "UPDATE", TK_UPDATE, }, { "USING", TK_USING, }, { "VACUUM", TK_VACUUM, }, { "VALUES", TK_VALUES, }, { "VIEW", TK_VIEW, }, { "WHEN", TK_WHEN, }, { "WHERE", TK_WHERE, },};/*** This is the hash table*/#define KEY_HASH_SIZE 101static u8 aiHashTable[KEY_HASH_SIZE];/*** This function looks up an identifier to determine if it is a** keyword. If it is a keyword, the token code of that keyword is ** returned. If the input is not a keyword, TK_ID is returned.*/int sqliteKeywordCode(const char *z, int n){ int h, i; Keyword *p; static char needInit = 1; if( needInit ){ /* Initialize the keyword hash table */ sqliteOsEnterMutex(); if( needInit ){ int nk; nk = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]); for(i=0; i<nk; i++){ aKeywordTable[i].len = strlen(aKeywordTable[i].zName); h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len); h %= KEY_HASH_SIZE; aKeywordTable[i].iNext = aiHashTable[h]; aiHashTable[h] = i+1; } needInit = 0; } sqliteOsLeaveMutex(); } h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE; for(i=aiHashTable[h]; i; i=p->iNext){ p = &aKeywordTable[i-1]; if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){ return p->tokenType; } } return TK_ID;}/*** If X is a character that can be used in an identifier and** X&0x80==0 then isIdChar[X] will be 1. If X&0x80==0x80 then** X is always an identifier character. (Hence all UTF-8** characters can be part of an identifier). isIdChar[X] will** be 0 for every character in the lower 128 ASCII characters** that cannot be used as part of an identifier.**** In this implementation, an identifier can be a string of** alphabetic characters, digits, and "_" plus any character** with the high-order bit set. The latter rule means that** any sequence of UTF-8 characters or characters taken from** an extended ISO8859 character set can form an identifier.*/static const char isIdChar[] = {/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */};/*** Return the length of the token that begins at z[0]. ** Store the token type in *tokenType before returning.*/static int sqliteGetToken(const unsigned char *z, int *tokenType){ int i; switch( *z ){ case ' ': case '\t': case '\n': case '\f': case '\r': { for(i=1; isspace(z[i]); i++){} *tokenType = TK_SPACE; return i; } case '-': { if( z[1]=='-' ){ for(i=2; z[i] && z[i]!='\n'; i++){} *tokenType = TK_COMMENT; return i; } *tokenType = TK_MINUS; return 1; } case '(': { *tokenType = TK_LP; return 1; } case ')': { *tokenType = TK_RP; return 1; } case ';': { *tokenType = TK_SEMI; return 1; } case '+': { *tokenType = TK_PLUS; return 1; } case '*': { *tokenType = TK_STAR; return 1; } case '/': { if( z[1]!='*' || z[2]==0 ){ *tokenType = TK_SLASH; return 1; } for(i=3; z[i] && (z[i]!='/' || z[i-1]!='*'); i++){} if( z[i] ) i++; *tokenType = TK_COMMENT; return i; } case '%': { *tokenType = TK_REM; return 1; } case '=': { *tokenType = TK_EQ; return 1 + (z[1]=='='); } case '<': { if( z[1]=='=' ){ *tokenType = TK_LE; return 2; }else if( z[1]=='>' ){ *tokenType = TK_NE; return 2; }else if( z[1]=='<' ){ *tokenType = TK_LSHIFT; return 2; }else{ *tokenType = TK_LT; return 1; } } case '>': { if( z[1]=='=' ){ *tokenType = TK_GE; return 2; }else if( z[1]=='>' ){ *tokenType = TK_RSHIFT; return 2; }else{ *tokenType = TK_GT; return 1; } } case '!': { if( z[1]!='=' ){ *tokenType = TK_ILLEGAL; return 2; }else{ *tokenType = TK_NE; return 2; } } case '|': { if( z[1]!='|' ){ *tokenType = TK_BITOR; return 1; }else{ *tokenType = TK_CONCAT; return 2; } } case ',': { *tokenType = TK_COMMA; return 1; } case '&': { *tokenType = TK_BITAND; return 1; } case '~': { *tokenType = TK_BITNOT; return 1; } case '\'': case '"': { int delim = z[0]; for(i=1; z[i]; i++){ if( z[i]==delim ){ if( z[i+1]==delim ){ i++; }else{ break; } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -