scan-gram.l

来自「bison 2.0 主要可以用来做语法分析用的」· L 代码 · 共 1,073 行 · 第 1/2 页
1,073 行
/* Bison Grammar Scanner                             -*- C -*-   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.   This file is part of Bison, the GNU Compiler Compiler.   This program is free software; you can redistribute it and/or modify   it under the terms of the GNU General Public License as published by   the Free Software Foundation; either version 2 of the License, or   (at your option) any later version.   This program is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   GNU General Public License for more details.   You should have received a copy of the GNU General Public License   along with this program; if not, write to the Free Software   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA   02111-1307  USA*/%option debug nodefault nounput noyywrap never-interactive%option prefix="gram_" outfile="lex.yy.c"%{#include "system.h"#include <mbswidth.h>#include <get-errno.h>#include <quote.h>#include "complain.h"#include "files.h"#include "getargs.h"#include "gram.h"#include "reader.h"#include "uniqstr.h"#define YY_USER_INIT					\  do							\    {							\      scanner_cursor.file = current_file;		\      scanner_cursor.line = 1;				\      scanner_cursor.column = 1;			\      code_start = scanner_cursor;			\    }							\  while (0)/* Location of scanner cursor.  */boundary scanner_cursor;static void adjust_location (location *, char const *, size_t);#define YY_USER_ACTION  adjust_location (loc, yytext, yyleng);static size_t no_cr_read (FILE *, char *, size_t);#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to   keep (to construct ID, STRINGS etc.).  Use the following macros to   use it.   Use STRING_GROW to append what has just been matched, and   STRING_FINISH to end the string (it puts the ending 0).   STRING_FINISH also stores this string in LAST_STRING, which can be   used, and which is used by STRING_FREE to free the last string.  */static struct obstack obstack_for_string;/* A string representing the most recently saved token.  */static char *last_string;#define STRING_GROW   \  obstack_grow (&obstack_for_string, yytext, yyleng)#define STRING_FINISH					\  do {							\    obstack_1grow (&obstack_for_string, '\0');		\    last_string = obstack_finish (&obstack_for_string);	\  } while (0)#define STRING_FREE \  obstack_free (&obstack_for_string, last_string)voidscanner_last_string_free (void){  STRING_FREE;}/* Within well-formed rules, RULE_LENGTH is the number of values in   the current rule so far, which says where to find `$0' with respect   to the top of the stack.  It is not the same as the rule->length in   the case of mid rule actions.   Outside of well-formed rules, RULE_LENGTH has an undefined value.  */static int rule_length;static void handle_dollar (int token_type, char *cp, location loc);static void handle_at (int token_type, char *cp, location loc);static void handle_syncline (char *args);static unsigned long int scan_integer (char const *p, int base, location loc);static int convert_ucn_to_byte (char const *hex_text);static void unexpected_eof (boundary, char const *);static void unexpected_newline (boundary, char const *);%}%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT%x SC_STRING SC_CHARACTER%x SC_AFTER_IDENTIFIER%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUEletter	  [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]id	  {letter}({letter}|[0-9])*directive %{letter}({letter}|[0-9]|-)*int	  [0-9]+/* POSIX says that a tag must be both an id and a C union member, but   historically almost any character is allowed in a tag.  We disallow   NUL and newline, as this simplifies our implementation.  */tag	 [^\0\n>]+/* Zero or more instances of backslash-newline.  Following GCC, allow   white space between the backslash and the newline.  */splice	 (\\[ \f\t\v]*\n)*%%%{  /* Nesting level of the current code in braces.  */  int braces_level IF_LINT (= 0);  /* Parent context state, when applicable.  */  int context_state IF_LINT (= 0);  /* Token type to return, when applicable.  */  int token_type IF_LINT (= 0);  /* Location of most recent identifier, when applicable.  */  location id_loc IF_LINT (= empty_location);  /* Where containing code started, when applicable.  Its initial     value is relevant only when yylex is invoked in the SC_EPILOGUE     start condition.  */  boundary code_start = scanner_cursor;  /* Where containing comment or string or character literal started,     when applicable.  */  boundary token_start IF_LINT (= scanner_cursor);%}  /*-----------------------.  | Scanning white space.  |  `-----------------------*/<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>{  /* Comments and white space.  */  ","	       warn_at (*loc, _("stray `,' treated as white space"));  [ \f\n\t\v]  |  "//".*       ;  "/*" {    token_start = loc->start;    context_state = YY_START;    BEGIN SC_YACC_COMMENT;  }  /* #line directives are not documented, and may be withdrawn or     modified in future versions of Bison.  */  ^"#line "{int}" \"".*"\"\n" {    handle_syncline (yytext + sizeof "#line " - 1);  }}  /*----------------------------.  | Scanning Bison directives.  |  `----------------------------*/<INITIAL>{  "%binary"               return PERCENT_NONASSOC;  "%debug"                return PERCENT_DEBUG;  "%default"[-_]"prec"    return PERCENT_DEFAULT_PREC;  "%define"               return PERCENT_DEFINE;  "%defines"              return PERCENT_DEFINES;  "%destructor"		  token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;  "%dprec"		  return PERCENT_DPREC;  "%error"[-_]"verbose"   return PERCENT_ERROR_VERBOSE;  "%expect"               return PERCENT_EXPECT;  "%expect"[-_]"rr"	  return PERCENT_EXPECT_RR;  "%file-prefix"          return PERCENT_FILE_PREFIX;  "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;  "%initial-action"       token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;  "%glr-parser"           return PERCENT_GLR_PARSER;  "%left"                 return PERCENT_LEFT;  "%lex-param"		  token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;  "%locations"            return PERCENT_LOCATIONS;  "%merge"		  return PERCENT_MERGE;  "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;  "%no"[-_]"default"[-_]"prec"	return PERCENT_NO_DEFAULT_PREC;  "%no"[-_]"lines"        return PERCENT_NO_LINES;  "%nonassoc"             return PERCENT_NONASSOC;  "%nondeterministic-parser"   return PERCENT_NONDETERMINISTIC_PARSER;  "%nterm"                return PERCENT_NTERM;  "%output"               return PERCENT_OUTPUT;  "%parse-param"	  token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;  "%prec"                 rule_length--; return PERCENT_PREC;  "%printer"              token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;  "%pure"[-_]"parser"     return PERCENT_PURE_PARSER;  "%right"                return PERCENT_RIGHT;  "%skeleton"             return PERCENT_SKELETON;  "%start"                return PERCENT_START;  "%term"                 return PERCENT_TOKEN;  "%token"                return PERCENT_TOKEN;  "%token"[-_]"table"     return PERCENT_TOKEN_TABLE;  "%type"                 return PERCENT_TYPE;  "%union"		  token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;  "%verbose"              return PERCENT_VERBOSE;  "%yacc"                 return PERCENT_YACC;  {directive} {    complain_at (*loc, _("invalid directive: %s"), quote (yytext));  }  "="                     return EQUAL;  "|"                     rule_length = 0; return PIPE;  ";"                     return SEMICOLON;  {id} {    val->symbol = symbol_get (yytext, *loc);    id_loc = *loc;    rule_length++;    BEGIN SC_AFTER_IDENTIFIER;  }  {int} {    val->integer = scan_integer (yytext, 10, *loc);    return INT;  }  0[xX][0-9abcdefABCDEF]+ {    val->integer = scan_integer (yytext, 16, *loc);    return INT;  }  /* Characters.  We don't check there is only one.  */  "'"	      STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;  /* Strings. */  "\""	      STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;  /* Prologue. */  "%{"        code_start = loc->start; BEGIN SC_PROLOGUE;  /* Code in between braces.  */  "{" {    STRING_GROW;    token_type = BRACED_CODE;    braces_level = 0;    code_start = loc->start;    BEGIN SC_BRACED_CODE;  }  /* A type. */  "<"{tag}">" {    obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);    STRING_FINISH;    val->uniqstr = uniqstr_new (last_string);    STRING_FREE;    return TYPE;  }  "%%" {    static int percent_percent_count;    if (++percent_percent_count == 2)      BEGIN SC_EPILOGUE;    return PERCENT_PERCENT;  }  . {    complain_at (*loc, _("invalid character: %s"), quote (yytext));  }  <<EOF>> {    loc->start = loc->end = scanner_cursor;    yyterminate ();  }}  /*-----------------------------------------------------------------.  | Scanning after an identifier, checking whether a colon is next.  |  `-----------------------------------------------------------------*/<SC_AFTER_IDENTIFIER>{  ":" {    rule_length = 0;    *loc = id_loc;    BEGIN INITIAL;    return ID_COLON;  }  . {    scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);    yyless (0);    *loc = id_loc;    BEGIN INITIAL;    return ID;  }  <<EOF>> {    *loc = id_loc;    BEGIN INITIAL;    return ID;  }}  /*---------------------------------------------------------------.  | Scanning a Yacc comment.  The initial `/ *' is already eaten.  |  `---------------------------------------------------------------*/<SC_YACC_COMMENT>{  "*/"     BEGIN context_state;  .|\n	   ;  <<EOF>>  unexpected_eof (token_start, "*/"); BEGIN context_state;}  /*------------------------------------------------------------.  | Scanning a C comment.  The initial `/ *' is already eaten.  |  `------------------------------------------------------------*/<SC_COMMENT>{  "*"{splice}"/"  STRING_GROW; BEGIN context_state;  <<EOF>>	  unexpected_eof (token_start, "*/"); BEGIN context_state;}  /*--------------------------------------------------------------.  | Scanning a line comment.  The initial `//' is already eaten.  |  `--------------------------------------------------------------*/<SC_LINE_COMMENT>{  "\n"		 STRING_GROW; BEGIN context_state;  {splice}	 STRING_GROW;  <<EOF>>	 BEGIN context_state;}  /*------------------------------------------------.  | Scanning a Bison string, including its escapes. |  | The initial quote is already eaten.             |  `------------------------------------------------*/<SC_ESCAPED_STRING>{  "\"" {    STRING_GROW;    STRING_FINISH;    loc->start = token_start;    val->chars = last_string;    rule_length++;    BEGIN INITIAL;    return STRING;  }  \n		unexpected_newline (token_start, "\"");	BEGIN INITIAL;  <<EOF>>	unexpected_eof (token_start, "\"");	BEGIN INITIAL;}  /*----------------------------------------------------------.  | Scanning a Bison character literal, decoding its escapes. |  | The initial quote is already eaten.			      |  `----------------------------------------------------------*/<SC_ESCAPED_CHARACTER>{  "'" {    unsigned char last_string_1;    STRING_GROW;    STRING_FINISH;    loc->start = token_start;    val->symbol = symbol_get (last_string, *loc);    symbol_class_set (val->symbol, token_sym, *loc);    last_string_1 = last_string[1];    symbol_user_token_number_set (val->symbol, last_string_1, *loc);    STRING_FREE;    rule_length++;    BEGIN INITIAL;    return ID;  }  \n		unexpected_newline (token_start, "'");	BEGIN INITIAL;  <<EOF>>	unexpected_eof (token_start, "'");	BEGIN INITIAL;}<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>{  \0	    complain_at (*loc, _("invalid null character"));}  /*----------------------------.  | Decode escaped characters.  |  `----------------------------*/<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>{  \\[0-7]{1,3} {    unsigned long int c = strtoul (yytext + 1, 0, 8);    if (UCHAR_MAX < c)      complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));    else if (! c)       complain_at (*loc, _("invalid null character: %s"), quote (yytext));    else      obstack_1grow (&obstack_for_string, c);  }  \\x[0-9abcdefABCDEF]+ {    unsigned long int c;    set_errno (0);    c = strtoul (yytext + 2, 0, 16);    if (UCHAR_MAX < c || get_errno ())      complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));    else if (! c)      complain_at (*loc, _("invalid null character: %s"), quote (yytext));    else      obstack_1grow (&obstack_for_string, c);  }  \\a	obstack_1grow (&obstack_for_string, '\a');  \\b	obstack_1grow (&obstack_for_string, '\b');  \\f	obstack_1grow (&obstack_for_string, '\f');  \\n	obstack_1grow (&obstack_for_string, '\n');  \\r	obstack_1grow (&obstack_for_string, '\r');  \\t	obstack_1grow (&obstack_for_string, '\t');  \\v	obstack_1grow (&obstack_for_string, '\v');  /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */  \\("\""|"'"|"?"|"\\")  obstack_1grow (&obstack_for_string, yytext[1]);  \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {    int c = convert_ucn_to_byte (yytext);    if (c < 0)      complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));    else if (! c)      complain_at (*loc, _("invalid null character: %s"), quote (yytext));    else      obstack_1grow (&obstack_for_string, c);  }  \\(.|\n)	{    complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));    STRING_GROW;  }}  /*--------------------------------------------.  | Scanning user-code characters and strings.  |  `--------------------------------------------*/<SC_CHARACTER,SC_STRING>{  {splice}|\\{splice}[^\n$@\[\]]	STRING_GROW;}<SC_CHARACTER>{  "'"		STRING_GROW; BEGIN context_state;  \n		unexpected_newline (token_start, "'"); BEGIN context_state;  <<EOF>>	unexpected_eof (token_start, "'"); BEGIN context_state;}<SC_STRING>{  "\""		STRING_GROW; BEGIN context_state;  \n		unexpected_newline (token_start, "\""); BEGIN context_state;  <<EOF>>	unexpected_eof (token_start, "\""); BEGIN context_state;}  /*---------------------------------------------------.  | Strings, comments etc. can be found in user code.  |  `---------------------------------------------------*/<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>{  "'" {    STRING_GROW;    context_state = YY_START;    token_start = loc->start;    BEGIN SC_CHARACTER;  }  "\"" {    STRING_GROW;    context_state = YY_START;    token_start = loc->start;    BEGIN SC_STRING;  }  "/"{splice}"*" {    STRING_GROW;    context_state = YY_START;    token_start = loc->start;    BEGIN SC_COMMENT;  }  "/"{splice}"/" {    STRING_GROW;    context_state = YY_START;    BEGIN SC_LINE_COMMENT;  }}  /*---------------------------------------------------------------.  | Scanning after %union etc., possibly followed by white space.  |  | For %union only, allow arbitrary C code to appear before the   |  | following brace, as an extension to POSIX.			   |  `---------------------------------------------------------------*/<SC_PRE_CODE>{  . {    bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;    scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);    yyless (0);    if (valid)      {	braces_level = -1;	code_start = loc->start;	BEGIN SC_BRACED_CODE;      }    else      {	complain_at (*loc, _("missing `{' in `%s'"),
scan-gram.l - 源码说明

本页面展示了「bison 2.0 主要可以用来做语法分析用的」中的 scan-gram.l 源码文件，采用 L 编程语言编写，共 1,073 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫开发者社区收录了大量与语法分析相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?