📄 pgc.l

📁 PostgreSQL7.4.6 for Linux
💻 L
📖 第 1 页 / 共 2 页
字号:
12 下一页
%{/*------------------------------------------------------------------------- * * pgc.l *	  lexical scanner for ecpg * * This is a modified version of src/backend/parser/scan.l * * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.122.2.3 2004/07/20 18:22:53 meskes Exp $ * *------------------------------------------------------------------------- */#include "postgres_fe.h"#include <ctype.h>#include <sys/types.h>#include <limits.h>#include <errno.h>#include "extern.h"extern YYSTYPE yylval;static int		xcdepth = 0;	/* depth of nesting in slash-star comments *//* * literalbuf is used to accumulate literal values when multiple rules * are needed to parse a single literal.  Call startlit to reset buffer * to empty, addlit to add text.  Note that the buffer is permanently * malloc'd to the largest size needed so far in the current run. */static char    *literalbuf = NULL;		/* expandable buffer */static int		literallen;				/* actual current length */static int		literalalloc;			/* current allocated buffer size */#define startlit()	(literalbuf[0] = '\0', literallen = 0)static void addlit(char *ytext, int yleng);static void addlitchar (unsigned char);static void parse_include (void);char *token_start;int state_before;struct _yy_buffer { 	YY_BUFFER_STATE		buffer;	long				lineno;	char		  		*filename;	struct _yy_buffer 	*next;} *yy_buffer = NULL;static char *old;#define MAX_NESTED_IF 128static short preproc_tos;static short ifcond;static struct _if_value {	short condition;	short else_branch;} stacked_if_value[MAX_NESTED_IF];%}%option 8bit%option never-interactive%option noyywrap%option yylineno%s C SQL incl def def_ident/* * OK, here is a short description of lex/flex rules behavior. * The longest pattern which matches an input string is always chosen. * For equal-length patterns, the first occurring in the rules list is chosen. * INITIAL is the starting state, to which all non-conditional rules apply. * Exclusive states change parsing rules while the state is active.  When in * an exclusive state, only those rules defined for that state apply. * * We use exclusive states for quoted strings, extended comments, * and to eliminate parsing troubles for numeric strings. * Exclusive states: *	<xb> bit string literal *	<xc> extended C-style comments - thomas 1997-07-12 *	<xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27 *	<xh> hexadecimal numeric string - thomas 1997-11-16 *	<xq> quoted strings - thomas 1997-07-30 */%x xb%x xc%x xd%x xdc%x xh%x xq%x xpre%x xcond%x xskip/* Bit string */xbstart			[bB]{quote}xbstop			{quote}xbinside		[^']*xbcat			{quote}{whitespace_with_newline}{quote}/* Hexadecimal number */xhstart			[xX]{quote}xhstop			{quote}xhinside		[^']*xhcat			{quote}{whitespace_with_newline}{quote}/* National character */xnstart                        [nN]{quote}/* C version of hex number */xch			0[xX][0-9A-Fa-f]*/* Extended quote * xqdouble implements SQL92 embedded quote * xqcat allows strings to cross input lines */quote			'xqstart			{quote}xqstop			{quote}xqdouble		{quote}{quote}xqinside		[^\\']+xqescape                [\\][^0-7]xqoctesc                [\\][0-7]{1,3}xqcat			{quote}{whitespace_with_newline}{quote}/* Double quote * Allows embedded spaces and other special characters into identifiers. */dquote			\"xdstart			{dquote}xdstop			{dquote}xddouble				{dquote}{dquote}xdinside		[^"]+/* special stuff for C strings */xdcqq			\\\\xdcqdq			\\\"xdcother		[^"]xdcinside		({xdcqq}|{xdcqdq}|{xdcother})/* C-style comments * * The "extended comment" syntax closely resembles allowable operator syntax. * The tricky part here is to get lex to recognize a string starting with * slash-star as a comment, when interpreting it as an operator would produce * a longer match --- remember lex will prefer a longer match!	Also, if we * have something like plus-slash-star, lex will think this is a 3-character * operator whereas we want to see it as a + operator and a comment start. * The solution is two-fold: * 1. append {op_chars}* to xcstart so that it matches as much text as *	  {operator} would. Then the tie-breaker (first matching rule of same *	  length) ensures xcstart wins.  We put back the extra stuff with yyless() *	  in case it contains a star-slash that should terminate the comment. * 2. In the operator rule, check for slash-star within the operator, and *	  if found throw it back with yyless().  This handles the plus-slash-star *	  problem. * SQL92-style comments, which start with dash-dash, have similar interactions * with the operator rule. */xcstart			\/\*{op_chars}*xcstop			\*+\/xcinside		[^*/]+digit			[0-9]ident_start		[A-Za-z\200-\377_]ident_cont		[A-Za-z\200-\377_0-9\$]identifier		{ident_start}{ident_cont}*array			({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)])*typecast		"::"/* * "self" is the set of chars that should be returned as single-character * tokens.	"op_chars" is the set of chars that can make up "Op" tokens, * which can be one or more characters long (but if a single-char token * appears in the "self" set, it is not to be returned as an Op).  Note * that the sets overlap, but each has some chars that are not in the other. * * If you change either set, adjust the character lists appearing in the * rule for "operator"! */self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]operator		{op_chars}+/* we no longer allow unary minus in numbers. * instead we pass it separately to parser. there it gets * coerced via doNegate() -- Leon aug 20 1999 */integer			{digit}+decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))real			((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))param			\${integer}/* * In order to make the world safe for Windows and Mac clients as well as * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n * sequence will be seen as two successive newlines, but that doesn't cause * any problems.  SQL92-style comments, which start with -- and extend to the * next newline, are treated as equivalent to a single whitespace character. * * NOTE a fine point: if there is no newline following --, we will absorb * everything to the end of the input as a comment.  This is correct.  Older * versions of Postgres failed to recognize -- as a comment if the input * did not end with a newline. * * XXX perhaps \f (formfeed) should be treated as a newline as well? */ccomment		"//".*\nspace			[ \t\n\r\f]horiz_space		[ \t\f]newline			[\n\r]non_newline		[^\n\r]comment			("--"{non_newline}*)whitespace		({space}+|{comment})/* * SQL92 requires at least one newline in the whitespace separating * string literals that are to be concatenated.  Silly, but who are we * to argue?  Note that {whitespace_with_newline} should not have * after * it, whereas {whitespace} should generally have a * after it... */horiz_whitespace	({horiz_space}|{comment})whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)/* special characters for other dbms *//* we have to react differently in compat mode */informix_special	[\$]other			./* some stuff needed for ecpg */exec	[eE][xX][eE][cC]sql		[sS][qQ][lL]define	[dD][eE][fF][iI][nN][eE]include [iI][nN][cC][lL][uU][dD][eE]ifdef	[iI][fF][dD][eE][fF]ifndef	[iI][fF][nN][dD][eE][fF]else	[eE][lL][sS][eE]elif	[eE][lL][iI][fF]endif	[eE][nN][dD][iI][fF]struct	[sS][tT][rR][uU][cC][tT]exec_sql		{exec}{space}*{sql}{space}*ipdigit			({digit}|{digit}{digit}|{digit}{digit}{digit})ip			{ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}/* we might want to parse all cpp include files */cppinclude 		{space}*#{include}{space}*/* Take care of cpp continuation lines */cppline			{space}*#(.*\\{space})+.*/* * Quoted strings must allow some special characters such as single-quote *	and newline. * Embedded single-quotes are implemented both in the SQL92-standard *	style of two adjacent single quotes "''" and in the Postgres/Java style *	of escaped-quote "\'". * Other embedded escaped characters are matched explicitly and the leading *	backslash is dropped from the string. - thomas 1997-09-24 * Note that xcstart must appear before operator, as explained above! *	Also whitespace (comment) must appear before operator. */%%%{                                       /* code to execute during start of each call of yylex() */                                       token_start = NULL;%}<SQL>{whitespace}	{ /* ignore */ }{xcstart}			{						token_start = yytext;						state_before = YYSTATE;						xcdepth = 0;						BEGIN(xc);						/* Put back any characters past slash-star; see above */						yyless(2);						fputs("/*", yyout);					}<xc>{xcstart}		{						xcdepth++;				 		/* Put back any characters past slash-star; see above */				 		yyless(2);						fputs("/*", yyout);					}<xc>{xcstop}		{						ECHO;						if (xcdepth <= 0)						{							BEGIN(state_before);							token_start = NULL;						}						else							xcdepth--;					}<xc>{xcinside}		{ ECHO; }<xc>{op_chars}		{ ECHO; }<xc><<EOF>>		{ mmerror(PARSE_ERROR, ET_FATAL, "Unterminated /* comment"); }<SQL>{xbstart}	{						token_start = yytext;						BEGIN(xb);						startlit();						addlitchar('b');					}<xb>{xbstop}	{						BEGIN(SQL);						if (literalbuf[strspn(literalbuf, "01") + 1] != '\0')							mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string input.");						yylval.str = literalbuf;						return BCONST;					}<xh>{xhinside}	|<xb>{xbinside}	{ addlit(yytext, yyleng); }<xh>{xhcat}		|<xb>{xbcat}		{ /* ignore */ }<xb><<EOF>>		{ mmerror(PARSE_ERROR, ET_FATAL, "Unterminated bit string"); }<SQL>{xhstart}		{						token_start = yytext;						BEGIN(xh);						startlit();						addlitchar('x');					}<xh>{xhstop}		{						yylval.str = literalbuf;						return XCONST;					}<xh><<EOF>>		{ mmerror(PARSE_ERROR, ET_FATAL, "Unterminated hexadecimal integer"); }<SQL>{xnstart}              {				/* National character.				 * Need to remember type info to flow it forward into the parser.		                 * Not yet implemented. - thomas 2002-06-17		                 */			        token_start = yytext;				BEGIN(xq);				startlit();			}<C,SQL>{xqstart}	{						token_start = yytext;						state_before = YYSTATE;						BEGIN(xq);						startlit();					}<xq>{xqstop}		{						BEGIN(state_before);						yylval.str = mm_strdup(literalbuf);						return SCONST;					}<xq>{xqdouble}		{ addlitchar('\''); }<xq>{xqinside}		{ addlit(yytext, yyleng); }<xq>{xqescape}  	{ addlit(yytext, yyleng); }<xq>{xqoctesc}          { addlit(yytext, yyleng); }<xq>{xqcat}		{ /* ignore */ }<xq><<EOF>>		{ mmerror(PARSE_ERROR, ET_FATAL, "Unterminated quoted string"); }<SQL>{xdstart}		{						state_before = YYSTATE;						BEGIN(xd);						startlit();					}<xd>{xdstop}		{						BEGIN(state_before);						if (literallen == 0)							mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");						yylval.str = mm_strdup(literalbuf);						return CSTRING;					}<xdc>{xdstop}		{						BEGIN(state_before);						yylval.str = mm_strdup(literalbuf);						return CSTRING;					}<xd>{xddouble}		{ addlitchar('"'); }<xd>{xdinside}		{ addlit(yytext, yyleng); }<xd,xdc><<EOF>>		{ mmerror(PARSE_ERROR, ET_FATAL, "Unterminated quoted identifier"); }<C,SQL>{xdstart}	{						state_before = YYSTATE;						BEGIN(xdc);						startlit();					}<xdc>{xdcinside}	{ addlit(yytext, yyleng); }<SQL>{typecast}		{ return TYPECAST; }<SQL>{informix_special}	{			  /* are we simulating Informix? */                          if (INFORMIX_MODE)			  {			  	unput(':');			  }			  else				return yytext[0];			}<SQL>{self}			{ /*					   * We may find a ';' inside a structure					   * definition in a TYPE or VAR statement.					   * This is not an EOL marker.					   */					  if (yytext[0] == ';' && struct_level == 0)						 BEGIN C;					  return yytext[0];					}<SQL>{operator}		{						/*						 * Check for embedded slash-star or dash-dash; those						 * are comment starts, so operator must stop there.						 * Note that slash-star or dash-dash at the first						 * character will match a prior rule, not this one.						 */						int		nchars = yyleng;						char   *slashstar = strstr(yytext, "/*");						char   *dashdash = strstr(yytext, "--");						if (slashstar && dashdash)						{							/* if both appear, take the first one */							if (slashstar > dashdash)								slashstar = dashdash;						}						else if (!slashstar)							slashstar = dashdash;						if (slashstar)							nchars = slashstar - yytext;						/*						 * For SQL92 compatibility, '+' and '-' cannot be the						 * last char of a multi-char operator unless the operator						 * contains chars that are not in SQL92 operators.						 * The idea is to lex '=-' as two operators, but not						 * to forbid operator names like '?-' that could not be						 * sequences of SQL92 operators.						 */						while (nchars > 1 &&							   (yytext[nchars-1] == '+' ||								yytext[nchars-1] == '-'))						{							int		ic;							for (ic = nchars-2; ic >= 0; ic--)							{								if (strchr("~!@#^&|`?%", yytext[ic]))									break;							}							if (ic >= 0)								break; /* found a char that makes it OK */							nchars--; /* else remove the +/-, and check again */						}						if (nchars < yyleng)						{							/* Strip the unwanted chars from the token */							yyless(nchars);							/*							 * If what we have left is only one char, and it's							 * one of the characters matching "self", then							 * return it as a character token the same way							 * that the "self" rule would have.							 */							if (nchars == 1 &&								strchr(",()[].;:+-*/%^<>=", yytext[0]))								return yytext[0];						}						/* Convert "!=" operator to "<>" for compatibility */						if (strcmp(yytext, "!=") == 0)							yylval.str = mm_strdup("<>");						else							yylval.str = mm_strdup(yytext);						return Op;					}<SQL>{param}		{						yylval.ival = atol(yytext+1);						return PARAM;					}<C,SQL>{integer}	{						long val;						char* endptr;						errno = 0;						val = strtol((char *)yytext, &endptr,10);						if (*endptr != '\0' || errno == ERANGE#ifdef HAVE_LONG_INT_64							/* if long > 32 bits, check for overflow of int4 */							|| val != (long) ((int32) val)#endif							)						{							errno = 0;							yylval.str = mm_strdup(yytext);							return FCONST;						}						yylval.ival = val;						return ICONST;					}<SQL>{ip}			{						yylval.str = mm_strdup(yytext);						return IP;					}{decimal}			{						yylval.str = mm_strdup(yytext);						return FCONST;					}<C,SQL>{real}		{						yylval.str = mm_strdup(yytext);						return FCONST;					}<SQL>:{identifier}((("->"|\.){identifier})|(\[{array}\]))*	{						yylval.str = mm_strdup(yytext+1);						return(CVARIABLE);					}<SQL>{identifier}	{						ScanKeyword    *keyword;						struct _defines *ptr;						/* How about a DEFINE? */						for (ptr = defines; ptr; ptr = ptr->next)						{							if (strcmp(yytext, ptr->old) == 0 && ptr->used == NULL)							{								struct _yy_buffer *yb;								yb = mm_alloc(sizeof(struct _yy_buffer));								yb->buffer =  YY_CURRENT_BUFFER;								yb->lineno = yylineno;								yb->filename = mm_strdup(input_filename);								ptr->used = yb->next = yy_buffer;								yy_buffer = yb;								yy_scan_string(ptr->new);								break;							}
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -