📄 pgc.l
字号:
%{/*------------------------------------------------------------------------- * * pgc.l * lexical scanner for ecpg * * This is a modified version of src/backend/parser/scan.l * * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.122.2.3 2004/07/20 18:22:53 meskes Exp $ * *------------------------------------------------------------------------- */#include "postgres_fe.h"#include <ctype.h>#include <sys/types.h>#include <limits.h>#include <errno.h>#include "extern.h"extern YYSTYPE yylval;static int xcdepth = 0; /* depth of nesting in slash-star comments *//* * literalbuf is used to accumulate literal values when multiple rules * are needed to parse a single literal. Call startlit to reset buffer * to empty, addlit to add text. Note that the buffer is permanently * malloc'd to the largest size needed so far in the current run. */static char *literalbuf = NULL; /* expandable buffer */static int literallen; /* actual current length */static int literalalloc; /* current allocated buffer size */#define startlit() (literalbuf[0] = '\0', literallen = 0)static void addlit(char *ytext, int yleng);static void addlitchar (unsigned char);static void parse_include (void);char *token_start;int state_before;struct _yy_buffer { YY_BUFFER_STATE buffer; long lineno; char *filename; struct _yy_buffer *next;} *yy_buffer = NULL;static char *old;#define MAX_NESTED_IF 128static short preproc_tos;static short ifcond;static struct _if_value { short condition; short else_branch;} stacked_if_value[MAX_NESTED_IF];%}%option 8bit%option never-interactive%option noyywrap%option yylineno%s C SQL incl def def_ident/* * OK, here is a short description of lex/flex rules behavior. * The longest pattern which matches an input string is always chosen. * For equal-length patterns, the first occurring in the rules list is chosen. * INITIAL is the starting state, to which all non-conditional rules apply. * Exclusive states change parsing rules while the state is active. When in * an exclusive state, only those rules defined for that state apply. * * We use exclusive states for quoted strings, extended comments, * and to eliminate parsing troubles for numeric strings. * Exclusive states: * <xb> bit string literal * <xc> extended C-style comments - thomas 1997-07-12 * <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27 * <xh> hexadecimal numeric string - thomas 1997-11-16 * <xq> quoted strings - thomas 1997-07-30 */%x xb%x xc%x xd%x xdc%x xh%x xq%x xpre%x xcond%x xskip/* Bit string */xbstart [bB]{quote}xbstop {quote}xbinside [^']*xbcat {quote}{whitespace_with_newline}{quote}/* Hexadecimal number */xhstart [xX]{quote}xhstop {quote}xhinside [^']*xhcat {quote}{whitespace_with_newline}{quote}/* National character */xnstart [nN]{quote}/* C version of hex number */xch 0[xX][0-9A-Fa-f]*/* Extended quote * xqdouble implements SQL92 embedded quote * xqcat allows strings to cross input lines */quote 'xqstart {quote}xqstop {quote}xqdouble {quote}{quote}xqinside [^\\']+xqescape [\\][^0-7]xqoctesc [\\][0-7]{1,3}xqcat {quote}{whitespace_with_newline}{quote}/* Double quote * Allows embedded spaces and other special characters into identifiers. */dquote \"xdstart {dquote}xdstop {dquote}xddouble {dquote}{dquote}xdinside [^"]+/* special stuff for C strings */xdcqq \\\\xdcqdq \\\"xdcother [^"]xdcinside ({xdcqq}|{xdcqdq}|{xdcother})/* C-style comments * * The "extended comment" syntax closely resembles allowable operator syntax. * The tricky part here is to get lex to recognize a string starting with * slash-star as a comment, when interpreting it as an operator would produce * a longer match --- remember lex will prefer a longer match! Also, if we * have something like plus-slash-star, lex will think this is a 3-character * operator whereas we want to see it as a + operator and a comment start. * The solution is two-fold: * 1. append {op_chars}* to xcstart so that it matches as much text as * {operator} would. Then the tie-breaker (first matching rule of same * length) ensures xcstart wins. We put back the extra stuff with yyless() * in case it contains a star-slash that should terminate the comment. * 2. In the operator rule, check for slash-star within the operator, and * if found throw it back with yyless(). This handles the plus-slash-star * problem. * SQL92-style comments, which start with dash-dash, have similar interactions * with the operator rule. */xcstart \/\*{op_chars}*xcstop \*+\/xcinside [^*/]+digit [0-9]ident_start [A-Za-z\200-\377_]ident_cont [A-Za-z\200-\377_0-9\$]identifier {ident_start}{ident_cont}*array ({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)])*typecast "::"/* * "self" is the set of chars that should be returned as single-character * tokens. "op_chars" is the set of chars that can make up "Op" tokens, * which can be one or more characters long (but if a single-char token * appears in the "self" set, it is not to be returned as an Op). Note * that the sets overlap, but each has some chars that are not in the other. * * If you change either set, adjust the character lists appearing in the * rule for "operator"! */self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]operator {op_chars}+/* we no longer allow unary minus in numbers. * instead we pass it separately to parser. there it gets * coerced via doNegate() -- Leon aug 20 1999 */integer {digit}+decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))param \${integer}/* * In order to make the world safe for Windows and Mac clients as well as * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n * sequence will be seen as two successive newlines, but that doesn't cause * any problems. SQL92-style comments, which start with -- and extend to the * next newline, are treated as equivalent to a single whitespace character. * * NOTE a fine point: if there is no newline following --, we will absorb * everything to the end of the input as a comment. This is correct. Older * versions of Postgres failed to recognize -- as a comment if the input * did not end with a newline. * * XXX perhaps \f (formfeed) should be treated as a newline as well? */ccomment "//".*\nspace [ \t\n\r\f]horiz_space [ \t\f]newline [\n\r]non_newline [^\n\r]comment ("--"{non_newline}*)whitespace ({space}+|{comment})/* * SQL92 requires at least one newline in the whitespace separating * string literals that are to be concatenated. Silly, but who are we * to argue? Note that {whitespace_with_newline} should not have * after * it, whereas {whitespace} should generally have a * after it... */horiz_whitespace ({horiz_space}|{comment})whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)/* special characters for other dbms *//* we have to react differently in compat mode */informix_special [\$]other ./* some stuff needed for ecpg */exec [eE][xX][eE][cC]sql [sS][qQ][lL]define [dD][eE][fF][iI][nN][eE]include [iI][nN][cC][lL][uU][dD][eE]ifdef [iI][fF][dD][eE][fF]ifndef [iI][fF][nN][dD][eE][fF]else [eE][lL][sS][eE]elif [eE][lL][iI][fF]endif [eE][nN][dD][iI][fF]struct [sS][tT][rR][uU][cC][tT]exec_sql {exec}{space}*{sql}{space}*ipdigit ({digit}|{digit}{digit}|{digit}{digit}{digit})ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}/* we might want to parse all cpp include files */cppinclude {space}*#{include}{space}*/* Take care of cpp continuation lines */cppline {space}*#(.*\\{space})+.*/* * Quoted strings must allow some special characters such as single-quote * and newline. * Embedded single-quotes are implemented both in the SQL92-standard * style of two adjacent single quotes "''" and in the Postgres/Java style * of escaped-quote "\'". * Other embedded escaped characters are matched explicitly and the leading * backslash is dropped from the string. - thomas 1997-09-24 * Note that xcstart must appear before operator, as explained above! * Also whitespace (comment) must appear before operator. */%%%{ /* code to execute during start of each call of yylex() */ token_start = NULL;%}<SQL>{whitespace} { /* ignore */ }{xcstart} { token_start = yytext; state_before = YYSTATE; xcdepth = 0; BEGIN(xc); /* Put back any characters past slash-star; see above */ yyless(2); fputs("/*", yyout); }<xc>{xcstart} { xcdepth++; /* Put back any characters past slash-star; see above */ yyless(2); fputs("/*", yyout); }<xc>{xcstop} { ECHO; if (xcdepth <= 0) { BEGIN(state_before); token_start = NULL; } else xcdepth--; }<xc>{xcinside} { ECHO; }<xc>{op_chars} { ECHO; }<xc><<EOF>> { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated /* comment"); }<SQL>{xbstart} { token_start = yytext; BEGIN(xb); startlit(); addlitchar('b'); }<xb>{xbstop} { BEGIN(SQL); if (literalbuf[strspn(literalbuf, "01") + 1] != '\0') mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string input."); yylval.str = literalbuf; return BCONST; }<xh>{xhinside} |<xb>{xbinside} { addlit(yytext, yyleng); }<xh>{xhcat} |<xb>{xbcat} { /* ignore */ }<xb><<EOF>> { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated bit string"); }<SQL>{xhstart} { token_start = yytext; BEGIN(xh); startlit(); addlitchar('x'); }<xh>{xhstop} { yylval.str = literalbuf; return XCONST; }<xh><<EOF>> { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated hexadecimal integer"); }<SQL>{xnstart} { /* National character. * Need to remember type info to flow it forward into the parser. * Not yet implemented. - thomas 2002-06-17 */ token_start = yytext; BEGIN(xq); startlit(); }<C,SQL>{xqstart} { token_start = yytext; state_before = YYSTATE; BEGIN(xq); startlit(); }<xq>{xqstop} { BEGIN(state_before); yylval.str = mm_strdup(literalbuf); return SCONST; }<xq>{xqdouble} { addlitchar('\''); }<xq>{xqinside} { addlit(yytext, yyleng); }<xq>{xqescape} { addlit(yytext, yyleng); }<xq>{xqoctesc} { addlit(yytext, yyleng); }<xq>{xqcat} { /* ignore */ }<xq><<EOF>> { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated quoted string"); }<SQL>{xdstart} { state_before = YYSTATE; BEGIN(xd); startlit(); }<xd>{xdstop} { BEGIN(state_before); if (literallen == 0) mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier"); yylval.str = mm_strdup(literalbuf); return CSTRING; }<xdc>{xdstop} { BEGIN(state_before); yylval.str = mm_strdup(literalbuf); return CSTRING; }<xd>{xddouble} { addlitchar('"'); }<xd>{xdinside} { addlit(yytext, yyleng); }<xd,xdc><<EOF>> { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated quoted identifier"); }<C,SQL>{xdstart} { state_before = YYSTATE; BEGIN(xdc); startlit(); }<xdc>{xdcinside} { addlit(yytext, yyleng); }<SQL>{typecast} { return TYPECAST; }<SQL>{informix_special} { /* are we simulating Informix? */ if (INFORMIX_MODE) { unput(':'); } else return yytext[0]; }<SQL>{self} { /* * We may find a ';' inside a structure * definition in a TYPE or VAR statement. * This is not an EOL marker. */ if (yytext[0] == ';' && struct_level == 0) BEGIN C; return yytext[0]; }<SQL>{operator} { /* * Check for embedded slash-star or dash-dash; those * are comment starts, so operator must stop there. * Note that slash-star or dash-dash at the first * character will match a prior rule, not this one. */ int nchars = yyleng; char *slashstar = strstr(yytext, "/*"); char *dashdash = strstr(yytext, "--"); if (slashstar && dashdash) { /* if both appear, take the first one */ if (slashstar > dashdash) slashstar = dashdash; } else if (!slashstar) slashstar = dashdash; if (slashstar) nchars = slashstar - yytext; /* * For SQL92 compatibility, '+' and '-' cannot be the * last char of a multi-char operator unless the operator * contains chars that are not in SQL92 operators. * The idea is to lex '=-' as two operators, but not * to forbid operator names like '?-' that could not be * sequences of SQL92 operators. */ while (nchars > 1 && (yytext[nchars-1] == '+' || yytext[nchars-1] == '-')) { int ic; for (ic = nchars-2; ic >= 0; ic--) { if (strchr("~!@#^&|`?%", yytext[ic])) break; } if (ic >= 0) break; /* found a char that makes it OK */ nchars--; /* else remove the +/-, and check again */ } if (nchars < yyleng) { /* Strip the unwanted chars from the token */ yyless(nchars); /* * If what we have left is only one char, and it's * one of the characters matching "self", then * return it as a character token the same way * that the "self" rule would have. */ if (nchars == 1 && strchr(",()[].;:+-*/%^<>=", yytext[0])) return yytext[0]; } /* Convert "!=" operator to "<>" for compatibility */ if (strcmp(yytext, "!=") == 0) yylval.str = mm_strdup("<>"); else yylval.str = mm_strdup(yytext); return Op; }<SQL>{param} { yylval.ival = atol(yytext+1); return PARAM; }<C,SQL>{integer} { long val; char* endptr; errno = 0; val = strtol((char *)yytext, &endptr,10); if (*endptr != '\0' || errno == ERANGE#ifdef HAVE_LONG_INT_64 /* if long > 32 bits, check for overflow of int4 */ || val != (long) ((int32) val)#endif ) { errno = 0; yylval.str = mm_strdup(yytext); return FCONST; } yylval.ival = val; return ICONST; }<SQL>{ip} { yylval.str = mm_strdup(yytext); return IP; }{decimal} { yylval.str = mm_strdup(yytext); return FCONST; }<C,SQL>{real} { yylval.str = mm_strdup(yytext); return FCONST; }<SQL>:{identifier}((("->"|\.){identifier})|(\[{array}\]))* { yylval.str = mm_strdup(yytext+1); return(CVARIABLE); }<SQL>{identifier} { ScanKeyword *keyword; struct _defines *ptr; /* How about a DEFINE? */ for (ptr = defines; ptr; ptr = ptr->next) { if (strcmp(yytext, ptr->old) == 0 && ptr->used == NULL) { struct _yy_buffer *yb; yb = mm_alloc(sizeof(struct _yy_buffer)); yb->buffer = YY_CURRENT_BUFFER; yb->lineno = yylineno; yb->filename = mm_strdup(input_filename); ptr->used = yb->next = yy_buffer; yy_buffer = yb; yy_scan_string(ptr->new); break; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -