📄 scan.l

📁 postgresql8.3.4源码,开源数据库
💻 L
字号:
%{/*------------------------------------------------------------------------- * * scan.l		- Scanner for the PL/pgSQL *			  procedural language * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.60 2008/01/01 19:46:00 momjian Exp $ * *------------------------------------------------------------------------- */#include "plpgsql.h"#include "mb/pg_wchar.h"/* No reason to constrain amount of data slurped */#define YY_READ_BUF_SIZE 16777216/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */#undef fprintf#define fprintf(file, fmt, msg)  ereport(ERROR, (errmsg_internal("%s", msg)))/* Handles to the buffer that the lexer uses internally */static YY_BUFFER_STATE scanbufhandle;static char *scanbuf;static const char *scanstr;		/* original input string */static int	scanner_functype;static bool	scanner_typereported;static int	pushback_token;static bool have_pushback_token;static const char *cur_line_start;static int	cur_line_num;static char    *dolqstart;      /* current $foo$ quote start string */static int	dolqlen;			/* signal to plpgsql_get_string_value */bool plpgsql_SpaceScanned = false;%}%option 8bit%option never-interactive%option nodefault%option nounput%option noyywrap%option prefix="plpgsql_base_yy"%option case-insensitive%x	IN_STRING%x	IN_COMMENT%x	IN_DOLLARQUOTEdigit			[0-9]ident_start		[A-Za-z\200-\377_]ident_cont		[A-Za-z\200-\377_0-9\$]quoted_ident	(\"[^\"]*\")+identifier		({ident_start}{ident_cont}*|{quoted_ident})param			\${digit}+space			[ \t\n\r\f]/* $foo$ style quotes ("dollar quoting") * copied straight from the backend SQL parser */dolq_start		[A-Za-z\200-\377_]dolq_cont		[A-Za-z\200-\377_0-9]dolqdelim		\$({dolq_start}{dolq_cont}*)?\$dolqinside		[^$]+%%    /* ----------     * Local variables in scanner to remember where     * a string or comment started     * ----------     */    int	start_lineno = 0;	char *start_charpos = NULL;    /* ----------     * Reset the state when entering the scanner     * ----------     */    BEGIN(INITIAL);    plpgsql_SpaceScanned = false;    /* ----------     * On the first call to a new source report the     * function's type (T_FUNCTION or T_TRIGGER)     * ----------     */	if (!scanner_typereported)	{		scanner_typereported = true;		return scanner_functype;	}    /* ----------     * The keyword rules     * ----------     */:=				{ return K_ASSIGN;			}=				{ return K_ASSIGN;			}\.\.			{ return K_DOTDOT;			}alias			{ return K_ALIAS;			}begin			{ return K_BEGIN;			}by				{ return K_BY;   			}close			{ return K_CLOSE;			}constant		{ return K_CONSTANT;		}continue		{ return K_CONTINUE;		}cursor			{ return K_CURSOR;			}debug			{ return K_DEBUG;			}declare			{ return K_DECLARE;			}default			{ return K_DEFAULT;			}diagnostics		{ return K_DIAGNOSTICS;		}else			{ return K_ELSE;			}elseif			{ return K_ELSIF;			}elsif			{ return K_ELSIF;			}end				{ return K_END;				}exception		{ return K_EXCEPTION;		}execute			{ return K_EXECUTE;			}exit			{ return K_EXIT;			}fetch			{ return K_FETCH;			}for				{ return K_FOR;				}from			{ return K_FROM;			}get				{ return K_GET;				}if				{ return K_IF;				}in				{ return K_IN;				}info			{ return K_INFO;			}insert			{ return K_INSERT;			}into			{ return K_INTO;			}is				{ return K_IS;				}log				{ return K_LOG;				}loop			{ return K_LOOP;			}move			{ return K_MOVE;			}no{space}+scroll { return K_NOSCROLL;		}not				{ return K_NOT;				}notice			{ return K_NOTICE;			}null			{ return K_NULL;			}open			{ return K_OPEN;			}or				{ return K_OR;				}perform			{ return K_PERFORM;			}raise			{ return K_RAISE;			}rename			{ return K_RENAME;			}result_oid		{ return K_RESULT_OID;		}return			{ return K_RETURN;			}reverse			{ return K_REVERSE;			}row_count		{ return K_ROW_COUNT;		}scroll			{ return K_SCROLL;			}strict			{ return K_STRICT;	    	}then			{ return K_THEN;			}to				{ return K_TO;				}type			{ return K_TYPE;			}warning			{ return K_WARNING;			}when			{ return K_WHEN;			}while			{ return K_WHILE;			}^#option		{ return O_OPTION;			}dump			{ return O_DUMP;			}    /* ----------     * Special word rules	 *	 * We set plpgsql_error_lineno in each rule so that errors reported	 * in the pl_comp.c subroutines will point to the right place.     * ----------     */{identifier}					{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_word(yytext); }{identifier}{space}*\.{space}*{identifier}	{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_dblword(yytext); }{identifier}{space}*\.{space}*{identifier}{space}*\.{space}*{identifier}	{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_tripword(yytext); }{identifier}{space}*%TYPE		{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_wordtype(yytext); }{identifier}{space}*\.{space}*{identifier}{space}*%TYPE	{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_dblwordtype(yytext); }{identifier}{space}*\.{space}*{identifier}{space}*\.{space}*{identifier}{space}*%TYPE	{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_tripwordtype(yytext); }{identifier}{space}*%ROWTYPE	{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_wordrowtype(yytext); }{identifier}{space}*\.{space}*{identifier}{space}*%ROWTYPE	{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_dblwordrowtype(yytext); }{param}							{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_word(yytext); }{param}{space}*\.{space}*{identifier}	{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_dblword(yytext); }{param}{space}*\.{space}*{identifier}{space}*\.{space}*{identifier}	{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_tripword(yytext); }{param}{space}*%TYPE			{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_wordtype(yytext); }{param}{space}*\.{space}*{identifier}{space}*%TYPE	{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_dblwordtype(yytext); }{param}{space}*\.{space}*{identifier}{space}*\.{space}*{identifier}{space}*%TYPE	{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_tripwordtype(yytext); }{param}{space}*%ROWTYPE		{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_wordrowtype(yytext); }{param}{space}*\.{space}*{identifier}{space}*%ROWTYPE	{	plpgsql_error_lineno = plpgsql_scanner_lineno();	return plpgsql_parse_dblwordrowtype(yytext); }{digit}+		{ return T_NUMBER;			}\".				{				plpgsql_error_lineno = plpgsql_scanner_lineno();				ereport(ERROR,						(errcode(ERRCODE_DATATYPE_MISMATCH),						 errmsg("unterminated quoted identifier")));			}    /* ----------     * Ignore whitespaces but remember this happened     * ----------     */{space}+		{ plpgsql_SpaceScanned = true;		}    /* ----------     * Eat up comments     * ----------     */--[^\r\n]*		;\/\*			{ start_lineno = plpgsql_scanner_lineno();			  BEGIN(IN_COMMENT);			}<IN_COMMENT>\*\/	{ BEGIN(INITIAL); plpgsql_SpaceScanned = true; }<IN_COMMENT>\n		;<IN_COMMENT>.		;<IN_COMMENT><<EOF>>	{				plpgsql_error_lineno = start_lineno;				ereport(ERROR,						(errcode(ERRCODE_DATATYPE_MISMATCH),						 errmsg("unterminated comment")));			}    /* ----------     * Collect anything inside of ''s and return one STRING token	 *	 * Hacking yytext/yyleng here lets us avoid using yymore(), which is	 * a win for performance.  It's safe because we know the underlying	 * input buffer is not changing.     * ----------     */'			{			  start_lineno = plpgsql_scanner_lineno();			  start_charpos = yytext;			  BEGIN(IN_STRING);			}[eE]'		{			  /* for now, treat the same as a regular literal */			  start_lineno = plpgsql_scanner_lineno();			  start_charpos = yytext;			  BEGIN(IN_STRING);			}<IN_STRING>\\.		{ }<IN_STRING>\\		{ /* can only happen with \ at EOF */ }<IN_STRING>''		{ }<IN_STRING>'		{			  /* tell plpgsql_get_string_value it's not a dollar quote */			  dolqlen = 0;			  /* adjust yytext/yyleng to describe whole string token */			  yyleng += (yytext - start_charpos);			  yytext = start_charpos;			  BEGIN(INITIAL);			  return T_STRING;			}<IN_STRING>[^'\\]+	{ }<IN_STRING><<EOF>>	{				plpgsql_error_lineno = start_lineno;				ereport(ERROR,						(errcode(ERRCODE_DATATYPE_MISMATCH),						 errmsg("unterminated string")));			}{dolqdelim}		{			  start_lineno = plpgsql_scanner_lineno();			  start_charpos = yytext;			  dolqstart = pstrdup(yytext);			  BEGIN(IN_DOLLARQUOTE);			}<IN_DOLLARQUOTE>{dolqdelim} {			  if (strcmp(yytext, dolqstart) == 0)			  {					pfree(dolqstart);					/* tell plpgsql_get_string_value it is a dollar quote */					dolqlen = yyleng;					/* adjust yytext/yyleng to describe whole string token */					yyleng += (yytext - start_charpos);					yytext = start_charpos;					BEGIN(INITIAL);					return T_STRING;			  }			  else			  {					/*					 * When we fail to match $...$ to dolqstart, transfer					 * the $... part to the output, but put back the final					 * $ for rescanning.  Consider $delim$...$junk$delim$					 */					yyless(yyleng-1);			  }			}<IN_DOLLARQUOTE>{dolqinside} { }<IN_DOLLARQUOTE>.	{ /* needed for $ inside the quoted text */ }<IN_DOLLARQUOTE><<EOF>>	{ 				plpgsql_error_lineno = start_lineno;				ereport(ERROR,						(errcode(ERRCODE_DATATYPE_MISMATCH),						 errmsg("unterminated dollar-quoted string")));			}    /* ----------     * Any unmatched character is returned as is     * ----------     */.			{ return yytext[0];			}%%/* * This is the yylex routine called from outside. It exists to provide * a one-token pushback facility.  Beware of trying to make it do more: * for the most part, plpgsql's gram.y assumes that yytext is in step * with the "current token". */intplpgsql_yylex(void){	if (have_pushback_token)	{		have_pushback_token = false;		return pushback_token;	}	return yylex();}/* * Push back a single token to be re-read by next plpgsql_yylex() call. * * NOTE: this does not cause yytext to "back up". */voidplpgsql_push_back_token(int token){	if (have_pushback_token)		elog(ERROR, "cannot push back multiple tokens");	pushback_token = token;	have_pushback_token = true;}/* * Report a syntax error. */voidplpgsql_yyerror(const char *message){	const char *loc = yytext;	int			cursorpos;	plpgsql_error_lineno = plpgsql_scanner_lineno();	/* in multibyte encodings, return index in characters not bytes */	cursorpos = pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1;	if (*loc == YY_END_OF_BUFFER_CHAR)	{		ereport(ERROR,				(errcode(ERRCODE_SYNTAX_ERROR),				 /* translator: %s is typically "syntax error" */				 errmsg("%s at end of input", message),				 internalerrposition(cursorpos),				 internalerrquery(scanstr)));	}	else	{		ereport(ERROR,				(errcode(ERRCODE_SYNTAX_ERROR),				 /* translator: first %s is typically "syntax error" */				 errmsg("%s at or near \"%s\"", message, loc),				 internalerrposition(cursorpos),				 internalerrquery(scanstr)));	}}/* * Get the line number at which the current token ends.  This substitutes * for flex's very poorly implemented yylineno facility. * * We assume that flex has written a '\0' over the character following the * current token in scanbuf.  So, we just have to count the '\n' characters * before that.  We optimize this a little by keeping track of the last * '\n' seen so far. */intplpgsql_scanner_lineno(void){	const char *c;	while ((c = strchr(cur_line_start, '\n')) != NULL)	{		cur_line_start = c + 1;		cur_line_num++;	}	return cur_line_num;}/* * Called before any actual parsing is done * * Note: the passed "str" must remain valid until plpgsql_scanner_finish(). * Although it is not fed directly to flex, we need the original string * to cite in error messages. */voidplpgsql_scanner_init(const char *str, int functype){	Size	slen;	slen = strlen(str);	/*	 * Might be left over after ereport()	 */	if (YY_CURRENT_BUFFER)		yy_delete_buffer(YY_CURRENT_BUFFER);	/*	 * Make a scan buffer with special termination needed by flex.	 */	scanbuf = palloc(slen + 2);	memcpy(scanbuf, str, slen);	scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;	scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);	/* Other setup */	scanstr = str;    scanner_functype = functype;    scanner_typereported = false;	have_pushback_token = false;	cur_line_start = scanbuf;	cur_line_num = 1;	/*----------	 * Hack: skip any initial newline, so that in the common coding layout	 *		CREATE FUNCTION ... AS '	 *			code body	 *		' LANGUAGE plpgsql;	 * we will think "line 1" is what the programmer thinks of as line 1.	 *----------	 */    if (*cur_line_start == '\r')        cur_line_start++;    if (*cur_line_start == '\n')        cur_line_start++;	BEGIN(INITIAL);}/* * Called after parsing is done to clean up after plpgsql_scanner_init() */voidplpgsql_scanner_finish(void){	yy_delete_buffer(scanbufhandle);	pfree(scanbuf);}/* * Called after a T_STRING token is read to get the string literal's value * as a palloc'd string.  (We make this a separate call because in many * scenarios there's no need to get the decoded value.) * * Note: we expect the literal to be the most recently lexed token.  This * would not work well if we supported multiple-token pushback or if  * plpgsql_yylex() wanted to read ahead beyond a T_STRING token. */char *plpgsql_get_string_value(void){	char	   *result;	const char *cp;	int			len;	if (dolqlen > 0)	{		/* Token is a $foo$...$foo$ string */		len = yyleng - 2 * dolqlen;		Assert(len >= 0);		result = (char *) palloc(len + 1);		memcpy(result, yytext + dolqlen, len);		result[len] = '\0';	}	else if (*yytext == 'E' || *yytext == 'e')	{		/* Token is an E'...' string */		result = (char *) palloc(yyleng + 1);	/* more than enough room */		len = 0;		for (cp = yytext + 2; *cp; cp++)		{			if (*cp == '\'')			{				if (cp[1] == '\'')					result[len++] = *cp++;				/* else it must be string end quote */			}			else if (*cp == '\\')			{				if (cp[1] != '\0')	/* just a paranoid check */					result[len++] = *(++cp);			}			else				result[len++] = *cp;		}		result[len] = '\0';	}	else	{		/* Token is a '...' string */		result = (char *) palloc(yyleng + 1);	/* more than enough room */		len = 0;		for (cp = yytext + 1; *cp; cp++)		{			if (*cp == '\'')			{				if (cp[1] == '\'')					result[len++] = *cp++;				/* else it must be string end quote */			}			else if (*cp == '\\')			{				if (cp[1] != '\0')	/* just a paranoid check */					result[len++] = *(++cp);			}			else				result[len++] = *cp;		}		result[len] = '\0';	}	return result;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -