📄 awk.y
字号:
/* * awk.y --- yacc/bison parser *//* * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Progamming Language. * * GAWK is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * GAWK is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GAWK; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */%{#ifdef DEBUG#define YYDEBUG 12#endif#include "awk.h"static void yyerror (); /* va_alist */static char *get_src_buf P((void));static int yylex P((void));static NODE *node_common P((NODETYPE op));static NODE *snode P((NODE *subn, NODETYPE op, int sindex));static NODE *mkrangenode P((NODE *cpair));static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr));static NODE *append_right P((NODE *list, NODE *new));static void func_install P((NODE *params, NODE *def));static void pop_var P((NODE *np, int freeit));static void pop_params P((NODE *params));static NODE *make_param P((char *name));static NODE *mk_rexp P((NODE *exp));static int want_assign; /* lexical scanning kludge */static int want_regexp; /* lexical scanning kludge */static int can_return; /* lexical scanning kludge */static int io_allowed = 1; /* lexical scanning kludge */static char *lexptr; /* pointer to next char during parsing */static char *lexend;static char *lexptr_begin; /* keep track of where we were for error msgs */static char *lexeme; /* beginning of lexeme for debugging */static char *thisline = NULL;#define YYDEBUG_LEXER_TEXT (lexeme)static int param_counter;static char *tokstart = NULL;static char *token = NULL;static char *tokend;NODE *variables[HASHSIZE];extern char *source;extern int sourceline;extern struct src *srcfiles;extern int numfiles;extern int errcount;extern NODE *begin_block;extern NODE *end_block;%}%union { long lval; AWKNUM fval; NODE *nodeval; NODETYPE nodetypeval; char *sval; NODE *(*ptrval)();}%type <nodeval> function_prologue function_body%type <nodeval> rexp exp start program rule simp_exp%type <nodeval> non_post_simp_exp%type <nodeval> pattern %type <nodeval> action variable param_list%type <nodeval> rexpression_list opt_rexpression_list%type <nodeval> expression_list opt_expression_list%type <nodeval> statements statement if_statement opt_param_list %type <nodeval> opt_exp opt_variable regexp %type <nodeval> input_redir output_redir%type <nodetypeval> print%type <sval> func_name%type <lval> lex_builtin%token <sval> FUNC_CALL NAME REGEXP%token <lval> ERROR%token <nodeval> YNUMBER YSTRING%token <nodetypeval> RELOP APPEND_OP%token <nodetypeval> ASSIGNOP MATCHOP NEWLINE CONCAT_OP%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE%token <nodetypeval> LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION%token <nodetypeval> LEX_GETLINE%token <nodetypeval> LEX_IN%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT%token <lval> LEX_BUILTIN LEX_LENGTH/* these are just yylval numbers *//* Lowest to highest */%right ASSIGNOP%right '?' ':'%left LEX_OR%left LEX_AND%left LEX_GETLINE%nonassoc LEX_IN%left FUNC_CALL LEX_BUILTIN LEX_LENGTH%nonassoc MATCHOP%nonassoc RELOP '<' '>' '|' APPEND_OP%left CONCAT_OP%left YSTRING YNUMBER%left '+' '-'%left '*' '/' '%'%right '!' UNARY%right '^'%left INCREMENT DECREMENT%left '$'%left '(' ')'%%start : opt_nls program opt_nls { expression_value = $2; } ;program : rule { if ($1 != NULL) $$ = $1; else $$ = NULL; yyerrok; } | program rule /* add the rule to the tail of list */ { if ($2 == NULL) $$ = $1; else if ($1 == NULL) $$ = $2; else { if ($1->type != Node_rule_list) $1 = node($1, Node_rule_list, (NODE*)NULL); $$ = append_right ($1, node($2, Node_rule_list,(NODE *) NULL)); } yyerrok; } | error { $$ = NULL; } | program error { $$ = NULL; } ;rule : LEX_BEGIN { io_allowed = 0; } action { if (begin_block) { if (begin_block->type != Node_rule_list) begin_block = node(begin_block, Node_rule_list, (NODE *)NULL); (void) append_right (begin_block, node( node((NODE *)NULL, Node_rule_node, $3), Node_rule_list, (NODE *)NULL) ); } else begin_block = node((NODE *)NULL, Node_rule_node, $3); $$ = NULL; io_allowed = 1; yyerrok; } | LEX_END { io_allowed = 0; } action { if (end_block) { if (end_block->type != Node_rule_list) end_block = node(end_block, Node_rule_list, (NODE *)NULL); (void) append_right (end_block, node( node((NODE *)NULL, Node_rule_node, $3), Node_rule_list, (NODE *)NULL)); } else end_block = node((NODE *)NULL, Node_rule_node, $3); $$ = NULL; io_allowed = 1; yyerrok; } | LEX_BEGIN statement_term { warning("BEGIN blocks must have an action part"); errcount++; yyerrok; } | LEX_END statement_term { warning("END blocks must have an action part"); errcount++; yyerrok; } | pattern action { $$ = node ($1, Node_rule_node, $2); yyerrok; } | action { $$ = node ((NODE *)NULL, Node_rule_node, $1); yyerrok; } | pattern statement_term { $$ = node ($1, Node_rule_node, node(node(node(make_number(0.0), Node_field_spec, (NODE *) NULL), Node_expression_list, (NODE *) NULL), Node_K_print, (NODE *) NULL)); yyerrok; } | function_prologue function_body { func_install($1, $2); $$ = NULL; yyerrok; } ;func_name : NAME { $$ = $1; } | FUNC_CALL { $$ = $1; } | lex_builtin { yyerror("%s() is a built-in function, it cannot be redefined", tokstart); errcount++; /* yyerrok; */ } ;lex_builtin : LEX_BUILTIN | LEX_LENGTH ; function_prologue : LEX_FUNCTION { param_counter = 0; } func_name '(' opt_param_list r_paren opt_nls { $$ = append_right(make_param($3), $5); can_return = 1; } ;function_body : l_brace statements r_brace opt_semi { $$ = $2; can_return = 0; } ;pattern : exp { $$ = $1; } | exp comma exp { $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); } ;regexp /* * In this rule, want_regexp tells yylex that the next thing * is a regexp so it should read up to the closing slash. */ : '/' { ++want_regexp; } REGEXP '/' { NODE *n; int len; getnode(n); n->type = Node_regex; len = strlen($3); n->re_exp = make_string($3, len); n->re_reg = make_regexp($3, len, 0, 1); n->re_text = NULL; n->re_flags = CONST; n->re_cnt = 1; $$ = n; } ;action : l_brace statements r_brace opt_semi opt_nls { $$ = $2 ; } | l_brace r_brace opt_semi opt_nls { $$ = NULL; } ;statements : statement { $$ = $1; } | statements statement { if ($1 == NULL || $1->type != Node_statement_list) $1 = node($1, Node_statement_list,(NODE *)NULL); $$ = append_right($1, node( $2, Node_statement_list, (NODE *)NULL)); yyerrok; } | error { $$ = NULL; } | statements error { $$ = NULL; } ;statement_term : nls | semi opt_nls ;statement : semi opt_nls { $$ = NULL; } | l_brace r_brace { $$ = NULL; } | l_brace statements r_brace { $$ = $2; } | if_statement { $$ = $1; } | LEX_WHILE '(' exp r_paren opt_nls statement { $$ = node ($3, Node_K_while, $6); } | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls { $$ = node ($6, Node_K_do, $3); } | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement { $$ = node ($8, Node_K_arrayfor, make_for_loop(variable($3,1), (NODE *)NULL, variable($5,1))); } | LEX_FOR '(' opt_exp semi exp semi opt_exp r_paren opt_nls statement { $$ = node($10, Node_K_for, (NODE *)make_for_loop($3, $5, $7)); } | LEX_FOR '(' opt_exp semi semi opt_exp r_paren opt_nls statement { $$ = node ($9, Node_K_for, (NODE *)make_for_loop($3, (NODE *)NULL, $6)); } | LEX_BREAK statement_term /* for break, maybe we'll have to remember where to break to */ { $$ = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); } | LEX_CONTINUE statement_term /* similarly */ { $$ = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); } | print '(' expression_list r_paren output_redir statement_term { $$ = node ($3, $1, $5); } | print opt_rexpression_list output_redir statement_term { if ($1 == Node_K_print && $2 == NULL) $2 = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL), Node_expression_list, (NODE *) NULL); $$ = node ($2, $1, $3); } | LEX_NEXT opt_exp statement_term { NODETYPE type; if ($2 && $2 == lookup("file")) { if (do_lint) warning("`next file' is a gawk extension"); else if (do_unix || do_posix) yyerror("`next file' is a gawk extension"); else if (! io_allowed) yyerror("`next file' used in BEGIN or END action"); type = Node_K_nextfile; } else { if (! io_allowed) yyerror("next used in BEGIN or END action"); type = Node_K_next; } $$ = node ((NODE *)NULL, type, (NODE *)NULL); } | LEX_EXIT opt_exp statement_term { $$ = node ($2, Node_K_exit, (NODE *)NULL); } | LEX_RETURN { if (! can_return) yyerror("return used outside function context"); } opt_exp statement_term { $$ = node ($3, Node_K_return, (NODE *)NULL); } | LEX_DELETE NAME '[' expression_list ']' statement_term { $$ = node (variable($2,1), Node_K_delete, $4); } | exp statement_term { $$ = $1; } ;print : LEX_PRINT { $$ = $1; } | LEX_PRINTF { $$ = $1; } ;if_statement : LEX_IF '(' exp r_paren opt_nls statement { $$ = node($3, Node_K_if, node($6, Node_if_branches, (NODE *)NULL)); } | LEX_IF '(' exp r_paren opt_nls statement LEX_ELSE opt_nls statement { $$ = node ($3, Node_K_if, node ($6, Node_if_branches, $9)); } ;nls : NEWLINE { want_assign = 0; } | nls NEWLINE ;opt_nls : /* empty */ | nls ;input_redir : /* empty */ { $$ = NULL; } | '<' simp_exp { $$ = node ($2, Node_redirect_input, (NODE *)NULL); } ;output_redir : /* empty */ { $$ = NULL; } | '>' exp { $$ = node ($2, Node_redirect_output, (NODE *)NULL); } | APPEND_OP exp { $$ = node ($2, Node_redirect_append, (NODE *)NULL); } | '|' exp { $$ = node ($2, Node_redirect_pipe, (NODE *)NULL); } ;opt_param_list : /* empty */ { $$ = NULL; } | param_list { $$ = $1; } ;param_list : NAME { $$ = make_param($1); } | param_list comma NAME { $$ = append_right($1, make_param($3)); yyerrok; } | error { $$ = NULL; } | param_list error { $$ = NULL; } | param_list comma error { $$ = NULL; } ;/* optional expression, as in for loop */opt_exp : /* empty */ { $$ = NULL; } | exp { $$ = $1; } ;opt_rexpression_list : /* empty */ { $$ = NULL; } | rexpression_list { $$ = $1; } ;rexpression_list : rexp { $$ = node ($1, Node_expression_list, (NODE *)NULL); } | rexpression_list comma rexp { $$ = append_right($1, node( $3, Node_expression_list, (NODE *)NULL)); yyerrok; } | error { $$ = NULL; } | rexpression_list error { $$ = NULL; } | rexpression_list error rexp { $$ = NULL; } | rexpression_list comma error { $$ = NULL; } ;opt_expression_list : /* empty */ { $$ = NULL; } | expression_list { $$ = $1; } ;expression_list : exp { $$ = node ($1, Node_expression_list, (NODE *)NULL); } | expression_list comma exp { $$ = append_right($1, node( $3, Node_expression_list, (NODE *)NULL)); yyerrok; } | error { $$ = NULL; } | expression_list error { $$ = NULL; } | expression_list error exp { $$ = NULL; } | expression_list comma error { $$ = NULL; } ;/* Expressions, not including the comma operator. */exp : variable ASSIGNOP { want_assign = 0; } exp { if (do_lint && $4->type == Node_regex) warning("Regular expression on left of assignment."); $$ = node ($1, $2, $4); } | '(' expression_list r_paren LEX_IN NAME { $$ = node (variable($5,1), Node_in_array, $2); } | exp '|' LEX_GETLINE opt_variable { $$ = node ($4, Node_K_getline, node ($1, Node_redirect_pipein, (NODE *)NULL)); } | LEX_GETLINE opt_variable input_redir { if (do_lint && ! io_allowed && $3 == NULL) warning("non-redirected getline undefined inside BEGIN or END action"); $$ = node ($2, Node_K_getline, $3); } | exp LEX_AND exp { $$ = node ($1, Node_and, $3); } | exp LEX_OR exp { $$ = node ($1, Node_or, $3); } | exp MATCHOP exp { if ($1->type == Node_regex) warning("Regular expression on left of MATCH operator."); $$ = node ($1, $2, mk_rexp($3)); } | regexp { $$ = $1; } | '!' regexp %prec UNARY { $$ = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL), Node_nomatch, $2); } | exp LEX_IN NAME { $$ = node (variable($3,1), Node_in_array, $1); } | exp RELOP exp { if (do_lint && $3->type == Node_regex) warning("Regular expression on left of comparison."); $$ = node ($1, $2, $3); } | exp '<' exp { $$ = node ($1, Node_less, $3); } | exp '>' exp { $$ = node ($1, Node_greater, $3); } | exp '?' exp ':' exp { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} | simp_exp { $$ = $1; } | exp simp_exp %prec CONCAT_OP { $$ = node ($1, Node_concat, $2); } ;rexp
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -