📄 scan.l

📁 PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统
💻 L
📖 第 1 页 / 共 2 页
字号:
12 下一页
%{/*------------------------------------------------------------------------- * * scan.l *	  lexical scanner for PostgreSQL * * NOTE NOTE NOTE: * * The rules in this file must be kept in sync with psql's lexer!!! * * The rules are designed so that the scanner never has to backtrack, * in the sense that there is always a rule that can match the input * consumed so far (the rule action may internally throw back some input * with yyless(), however).  As explained in the flex manual, this makes * for a useful speed increase --- about a third faster than a plain -CF * lexer, in simple testing.  The extra complexity is mostly in the rules * for handling float numbers and continued string literals.  If you change * the lexical rules, verify that you haven't broken the no-backtrack * property by running flex with the "-b" option and checking that the * resulting "lex.backup" file says that no backing up is needed. * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.128.2.1 2006/05/21 20:11:02 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <ctype.h>#include <unistd.h>#include "parser/gramparse.h"#include "parser/keywords.h"/* Not needed now that this file is compiled as part of gram.y *//* #include "parser/parse.h" */#include "parser/scansup.h"#include "mb/pg_wchar.h"/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */#undef fprintf#define fprintf(file, fmt, msg)  ereport(ERROR, (errmsg_internal("%s", msg)))extern YYSTYPE yylval;static int		xcdepth = 0;	/* depth of nesting in slash-star comments */static char    *dolqstart;      /* current $foo$ quote start string *//* * GUC variables.  This is a DIRECT violation of the warning given at the * head of gram.y, ie flex/bison code must not depend on any GUC variables; * as such, changing their values can induce very unintuitive behavior. * But we shall have to live with it as a short-term thing until the switch * to SQL-standard string syntax is complete. */BackslashQuoteType backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;bool			escape_string_warning = true;static bool		warn_on_first_escape;/* * literalbuf is used to accumulate literal values when multiple rules * are needed to parse a single literal.  Call startlit to reset buffer * to empty, addlit to add text.  Note that the buffer is palloc'd and * starts life afresh on every parse cycle. */static char	   *literalbuf;		/* expandable buffer */static int		literallen;		/* actual current length */static int		literalalloc;	/* current allocated buffer size */#define startlit()  (literalbuf[0] = '\0', literallen = 0)static void addlit(char *ytext, int yleng);static void addlitchar(unsigned char ychar);static char *litbufdup(void);static int	pg_err_position(void);static void check_escape_warning(void);/* * When we parse a token that requires multiple lexer rules to process, * we set token_start to point at the true start of the token, for use * by yyerror().  yytext will point at just the text consumed by the last * rule, so it's not very helpful (e.g., it might contain just the last * quote mark of a quoted identifier).  But to avoid cluttering every rule * with setting token_start, we allow token_start = NULL to denote that * it's okay to use yytext. */static char	   *token_start;/* Handles to the buffer that the lexer uses internally */static YY_BUFFER_STATE scanbufhandle;static char *scanbuf;unsigned char unescape_single_char(unsigned char c);%}%option 8bit%option never-interactive%option nodefault%option nounput%option noyywrap%option prefix="base_yy"/* * OK, here is a short description of lex/flex rules behavior. * The longest pattern which matches an input string is always chosen. * For equal-length patterns, the first occurring in the rules list is chosen. * INITIAL is the starting state, to which all non-conditional rules apply. * Exclusive states change parsing rules while the state is active.  When in * an exclusive state, only those rules defined for that state apply. * * We use exclusive states for quoted strings, extended comments, * and to eliminate parsing troubles for numeric strings. * Exclusive states: *  <xb> bit string literal *  <xc> extended C-style comments *  <xd> delimited identifiers (double-quoted identifiers) *  <xh> hexadecimal numeric string *  <xq> quoted strings *  <xdolq> $foo$ quoted strings */%x xb%x xc%x xd%x xh%x xq%x xdolq/* * In order to make the world safe for Windows and Mac clients as well as * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n * sequence will be seen as two successive newlines, but that doesn't cause * any problems.  Comments that start with -- and extend to the next * newline are treated as equivalent to a single whitespace character. * * NOTE a fine point: if there is no newline following --, we will absorb * everything to the end of the input as a comment.  This is correct.  Older * versions of Postgres failed to recognize -- as a comment if the input * did not end with a newline. * * XXX perhaps \f (formfeed) should be treated as a newline as well? */space			[ \t\n\r\f]horiz_space		[ \t\f]newline			[\n\r]non_newline		[^\n\r]comment			("--"{non_newline}*)whitespace		({space}+|{comment})/* * SQL requires at least one newline in the whitespace separating * string literals that are to be concatenated.  Silly, but who are we * to argue?  Note that {whitespace_with_newline} should not have * after * it, whereas {whitespace} should generally have a * after it... */special_whitespace		({space}+|{comment}{newline})horiz_whitespace		({horiz_space}|{comment})whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)/* * To ensure that {quotecontinue} can be scanned without having to back up * if the full pattern isn't matched, we include trailing whitespace in * {quotestop}.  This matches all cases where {quotecontinue} fails to match, * except for {quote} followed by whitespace and just one "-" (not two, * which would start a {comment}).  To cover that we have {quotefail}. * The actions for {quotestop} and {quotefail} must throw back characters * beyond the quote proper. */quote			'quotestop		{quote}{whitespace}*quotecontinue	{quote}{whitespace_with_newline}{quote}quotefail		{quote}{whitespace}*"-"/* Bit string * It is tempting to scan the string for only those characters * which are allowed. However, this leads to silently swallowed * characters if illegal characters are included in the string. * For example, if xbinside is [01] then B'ABCD' is interpreted * as a zero-length string, and the ABCD' is lost! * Better to pass the string forward and let the input routines * validate the contents. */xbstart			[bB]{quote}xbinside		[^']*/* Hexadecimal number */xhstart			[xX]{quote}xhinside		[^']*/* National character */xnstart			[nN]{quote}/* Quoted string that allows backslash escapes */xestart			[eE]{quote}/* Extended quote * xqdouble implements embedded quote, '''' */xqstart			{quote}xqdouble		{quote}{quote}xqinside		[^\\']+xqescape		[\\][^0-7]xqoctesc		[\\][0-7]{1,3}xqhexesc		[\\]x[0-9A-Fa-f]{1,2}/* $foo$ style quotes ("dollar quoting") * The quoted string starts with $foo$ where "foo" is an optional string * in the form of an identifier, except that it may not contain "$",  * and extends to the first occurrence of an identical string.   * There is *no* processing of the quoted text. * * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim} * fails to match its trailing "$". */dolq_start		[A-Za-z\200-\377_]dolq_cont		[A-Za-z\200-\377_0-9]dolqdelim		\$({dolq_start}{dolq_cont}*)?\$dolqfailed		\${dolq_start}{dolq_cont}*dolqinside		[^$]+/* Double quote * Allows embedded spaces and other special characters into identifiers. */dquote			\"xdstart			{dquote}xdstop			{dquote}xddouble		{dquote}{dquote}xdinside		[^"]+/* C-style comments * * The "extended comment" syntax closely resembles allowable operator syntax. * The tricky part here is to get lex to recognize a string starting with * slash-star as a comment, when interpreting it as an operator would produce * a longer match --- remember lex will prefer a longer match!  Also, if we * have something like plus-slash-star, lex will think this is a 3-character * operator whereas we want to see it as a + operator and a comment start. * The solution is two-fold: * 1. append {op_chars}* to xcstart so that it matches as much text as *    {operator} would. Then the tie-breaker (first matching rule of same *    length) ensures xcstart wins.  We put back the extra stuff with yyless() *    in case it contains a star-slash that should terminate the comment. * 2. In the operator rule, check for slash-star within the operator, and *    if found throw it back with yyless().  This handles the plus-slash-star *    problem. * Dash-dash comments have similar interactions with the operator rule. */xcstart			\/\*{op_chars}*xcstop			\*+\/xcinside		[^*/]+digit			[0-9]ident_start		[A-Za-z\200-\377_]ident_cont		[A-Za-z\200-\377_0-9\$]identifier		{ident_start}{ident_cont}*typecast		"::"/* * "self" is the set of chars that should be returned as single-character * tokens.  "op_chars" is the set of chars that can make up "Op" tokens, * which can be one or more characters long (but if a single-char token * appears in the "self" set, it is not to be returned as an Op).  Note * that the sets overlap, but each has some chars that are not in the other. * * If you change either set, adjust the character lists appearing in the * rule for "operator"! */self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]operator		{op_chars}+/* we no longer allow unary minus in numbers.  * instead we pass it separately to parser. there it gets * coerced via doNegate() -- Leon aug 20 1999 * * {realfail1} and {realfail2} are added to prevent the need for scanner * backup when the {real} rule fails to match completely. */integer			{digit}+decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))real			({integer}|{decimal})[Ee][-+]?{digit}+realfail1		({integer}|{decimal})[Ee]realfail2		({integer}|{decimal})[Ee][-+]param			\${integer}other			./* * Dollar quoted strings are totally opaque, and no escaping is done on them. * Other quoted strings must allow some special characters such as single-quote *  and newline. * Embedded single-quotes are implemented both in the SQL standard *  style of two adjacent single quotes "''" and in the Postgres/Java style *  of escaped-quote "\'". * Other embedded escaped characters are matched explicitly and the leading *  backslash is dropped from the string. * Note that xcstart must appear before operator, as explained above! *  Also whitespace (comment) must appear before operator. */%%%{					/* code to execute during start of each call of yylex() */					token_start = NULL;%}{whitespace}	{					/* ignore */				}{xcstart}		{					token_start = yytext;					xcdepth = 0;					BEGIN(xc);					/* Put back any characters past slash-star; see above */					yyless(2);				}<xc>{xcstart}	{					xcdepth++;					/* Put back any characters past slash-star; see above */					yyless(2);				}<xc>{xcstop}	{					if (xcdepth <= 0)					{						BEGIN(INITIAL);						/* reset token_start for next token */						token_start = NULL;					}					else						xcdepth--;				}<xc>{xcinside}	{					/* ignore */				}<xc>{op_chars}	{					/* ignore */				}<xc>\*+			{					/* ignore */				}<xc><<EOF>>		{ yyerror("unterminated /* comment"); }{xbstart}		{					/* Binary bit type.					 * At some point we should simply pass the string					 * forward to the parser and label it there.					 * In the meantime, place a leading "b" on the string					 * to mark it for the input routine as a binary string.					 */					token_start = yytext;					BEGIN(xb);					startlit();					addlitchar('b');				}<xb>{quotestop}	|<xb>{quotefail} {					yyless(1);					BEGIN(INITIAL);					yylval.str = litbufdup();					return BCONST;				}<xh>{xhinside}	|<xb>{xbinside}	{					addlit(yytext, yyleng);				}<xh>{quotecontinue}	|<xb>{quotecontinue}	{					/* ignore */				}<xb><<EOF>>		{ yyerror("unterminated bit string literal"); }{xhstart}		{					/* Hexadecimal bit type.					 * At some point we should simply pass the string					 * forward to the parser and label it there.					 * In the meantime, place a leading "x" on the string					 * to mark it for the input routine as a hex string.					 */					token_start = yytext;					BEGIN(xh);					startlit();					addlitchar('x');				}<xh>{quotestop}	|<xh>{quotefail} {					yyless(1);					BEGIN(INITIAL);					yylval.str = litbufdup();					return XCONST;				}<xh><<EOF>>		{ yyerror("unterminated hexadecimal string literal"); }{xnstart}		{					/* National character.					 * We will pass this along as a normal character string,					 * but preceded with an internally-generated "NCHAR".					 */					const ScanKeyword *keyword;					yyless(1);				/* eat only 'n' this time */					/* nchar had better be a keyword! */					keyword = ScanKeywordLookup("nchar");					Assert(keyword != NULL);					yylval.keyword = keyword->name;					return keyword->value;				}{xqstart}		{					warn_on_first_escape = true;					token_start = yytext;					BEGIN(xq);					startlit();				}{xestart}		{					warn_on_first_escape = false;					token_start = yytext;					BEGIN(xq);					startlit();				}<xq>{quotestop}	|<xq>{quotefail} {					yyless(1);					BEGIN(INITIAL);					yylval.str = litbufdup();					return SCONST;				}<xq>{xqdouble}  {					addlitchar('\'');
12 下一页
💿 文件大小 14179 K
👤 上传用户 babydog00
📂 所属分类其他数据库
🏷️ 相关标签

#PostgreSQL #Linux #源码 #开源
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -