📄 awk.y
字号:
: variable ASSIGNOP { want_assign = 0; } rexp { $$ = node ($1, $2, $4); } | rexp LEX_AND rexp { $$ = node ($1, Node_and, $3); } | rexp LEX_OR rexp { $$ = node ($1, Node_or, $3); } | LEX_GETLINE opt_variable input_redir { if (do_lint && ! io_allowed && $3 == NULL) warning("non-redirected getline undefined inside BEGIN or END action"); $$ = node ($2, Node_K_getline, $3); } | regexp { $$ = $1; } | '!' regexp %prec UNARY { $$ = node((NODE *) NULL, Node_nomatch, $2); } | rexp MATCHOP rexp { $$ = node ($1, $2, mk_rexp($3)); } | rexp LEX_IN NAME { $$ = node (variable($3,1), Node_in_array, $1); } | rexp RELOP rexp { $$ = node ($1, $2, $3); } | rexp '?' rexp ':' rexp { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} | simp_exp { $$ = $1; } | rexp simp_exp %prec CONCAT_OP { $$ = node ($1, Node_concat, $2); } ;simp_exp : non_post_simp_exp /* Binary operators in order of decreasing precedence. */ | simp_exp '^' simp_exp { $$ = node ($1, Node_exp, $3); } | simp_exp '*' simp_exp { $$ = node ($1, Node_times, $3); } | simp_exp '/' simp_exp { $$ = node ($1, Node_quotient, $3); } | simp_exp '%' simp_exp { $$ = node ($1, Node_mod, $3); } | simp_exp '+' simp_exp { $$ = node ($1, Node_plus, $3); } | simp_exp '-' simp_exp { $$ = node ($1, Node_minus, $3); } | variable INCREMENT { $$ = node ($1, Node_postincrement, (NODE *)NULL); } | variable DECREMENT { $$ = node ($1, Node_postdecrement, (NODE *)NULL); } ;non_post_simp_exp : '!' simp_exp %prec UNARY { $$ = node ($2, Node_not,(NODE *) NULL); } | '(' exp r_paren { $$ = $2; } | LEX_BUILTIN '(' opt_expression_list r_paren { $$ = snode ($3, Node_builtin, (int) $1); } | LEX_LENGTH '(' opt_expression_list r_paren { $$ = snode ($3, Node_builtin, (int) $1); } | LEX_LENGTH { if (do_lint) warning("call of `length' without parentheses is not portable"); $$ = snode ((NODE *)NULL, Node_builtin, (int) $1); if (do_posix) warning( "call of `length' without parentheses is deprecated by POSIX"); } | FUNC_CALL '(' opt_expression_list r_paren { $$ = node ($3, Node_func_call, make_string($1, strlen($1))); } | variable | INCREMENT variable { $$ = node ($2, Node_preincrement, (NODE *)NULL); } | DECREMENT variable { $$ = node ($2, Node_predecrement, (NODE *)NULL); } | YNUMBER { $$ = $1; } | YSTRING { $$ = $1; } | '-' simp_exp %prec UNARY { if ($2->type == Node_val) { $2->numbr = -(force_number($2)); $$ = $2; } else $$ = node ($2, Node_unary_minus, (NODE *)NULL); } | '+' simp_exp %prec UNARY { $$ = $2; } ;opt_variable : /* empty */ { $$ = NULL; } | variable { $$ = $1; } ;variable : NAME { $$ = variable($1,1); } | NAME '[' expression_list ']' { if ($3->rnode == NULL) { $$ = node (variable($1,1), Node_subscript, $3->lnode); freenode($3); } else $$ = node (variable($1,1), Node_subscript, $3); } | '$' non_post_simp_exp { $$ = node ($2, Node_field_spec, (NODE *)NULL); } ;l_brace : '{' opt_nls ;r_brace : '}' opt_nls { yyerrok; } ;r_paren : ')' { yyerrok; } ;opt_semi : /* empty */ | semi ;semi : ';' { yyerrok; want_assign = 0; } ;comma : ',' opt_nls { yyerrok; } ;%%struct token { char *operator; /* text to match */ NODETYPE value; /* node type */ int class; /* lexical class */ unsigned flags; /* # of args. allowed and compatability */# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */# define A(n) (1<<(n))# define VERSION 0xFF00 /* old awk is zero */# define NOT_OLD 0x0100 /* feature not in old awk */# define NOT_POSIX 0x0200 /* feature not in POSIX */# define GAWKX 0x0400 /* gawk extension */ NODE *(*ptr) (); /* function that implements this keyword */};extern NODE *do_exp(), *do_getline(), *do_index(), *do_length(), *do_sqrt(), *do_log(), *do_sprintf(), *do_substr(), *do_split(), *do_system(), *do_int(), *do_close(), *do_atan2(), *do_sin(), *do_cos(), *do_rand(), *do_srand(), *do_match(), *do_tolower(), *do_toupper(), *do_sub(), *do_gsub(), *do_strftime(), *do_systime();/* Tokentab is sorted ascii ascending order, so it can be binary searched. */static struct token tokentab[] = {{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0},{"END", Node_illegal, LEX_END, 0, 0},{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2},{"break", Node_K_break, LEX_BREAK, 0, 0},{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_close},{"continue", Node_K_continue, LEX_CONTINUE, 0, 0},{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos},{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0},{"do", Node_K_do, LEX_DO, NOT_OLD, 0},{"else", Node_illegal, LEX_ELSE, 0, 0},{"exit", Node_K_exit, LEX_EXIT, 0, 0},{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},{"for", Node_K_for, LEX_FOR, 0, 0},{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},{"if", Node_K_if, LEX_IF, 0, 0},{"in", Node_illegal, LEX_IN, 0, 0},{"index", Node_builtin, LEX_BUILTIN, A(2), do_index},{"int", Node_builtin, LEX_BUILTIN, A(1), do_int},{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length},{"log", Node_builtin, LEX_BUILTIN, A(1), do_log},{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_match},{"next", Node_K_next, LEX_NEXT, 0, 0},{"print", Node_K_print, LEX_PRINT, 0, 0},{"printf", Node_K_printf, LEX_PRINTF, 0, 0},{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0},{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin},{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split},{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_strftime},{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime},{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},{"while", Node_K_while, LEX_WHILE, 0, 0},};/* VARARGS0 */static voidyyerror(va_alist)va_dcl{ va_list args; char *mesg = NULL; register char *bp, *cp; char *scan; char buf[120]; errcount++; /* Find the current line in the input file */ if (lexptr) { if (!thisline) { cp = lexeme; if (*cp == '\n') { cp--; mesg = "unexpected newline"; } for ( ; cp != lexptr_begin && *cp != '\n'; --cp) ; if (*cp == '\n') cp++; thisline = cp; } /* NL isn't guaranteed */ bp = lexeme; while (bp < lexend && *bp && *bp != '\n') bp++; } else { thisline = "(END OF FILE)"; bp = thisline + 13; } msg("%.*s", (int) (bp - thisline), thisline); bp = buf; cp = buf + sizeof(buf) - 24; /* 24 more than longest msg. input */ if (lexptr) { scan = thisline; while (bp < cp && scan < lexeme) if (*scan++ == '\t') *bp++ = '\t'; else *bp++ = ' '; *bp++ = '^'; *bp++ = ' '; } va_start(args); if (mesg == NULL) mesg = va_arg(args, char *); strcpy(bp, mesg); err("", buf, args); va_end(args); exit(2);}static char *get_src_buf(){ static int samefile = 0; static int nextfile = 0; static char *buf = NULL; static int fd; int n; register char *scan; static int len = 0; static int did_newline = 0;# define SLOP 128 /* enough space to hold most source lines */ if (nextfile > numfiles) return NULL; if (srcfiles[nextfile].stype == CMDLINE) { if (len == 0) { len = strlen(srcfiles[nextfile].val); sourceline = 1; lexptr = lexptr_begin = srcfiles[nextfile].val; lexend = lexptr + len; } else if (!did_newline && *(lexptr-1) != '\n') { /* * The following goop is to ensure that the source * ends with a newline and that the entire current * line is available for error messages. */ int offset; did_newline = 1; offset = lexptr - lexeme; for (scan = lexeme; scan > lexptr_begin; scan--) if (*scan == '\n') { scan++; break; } len = lexptr - scan; emalloc(buf, char *, len+1, "get_src_buf"); memcpy(buf, scan, len); thisline = buf; lexptr = buf + len; *lexptr = '\n'; lexeme = lexptr - offset; lexptr_begin = buf; lexend = lexptr + 1; } else { len = 0; lexeme = lexptr = lexptr_begin = NULL; } if (lexptr == NULL && ++nextfile <= numfiles) return get_src_buf(); return lexptr; } if (!samefile) { source = srcfiles[nextfile].val; if (source == NULL) { if (buf) { free(buf); buf = NULL; } len = 0; return lexeme = lexptr = lexptr_begin = NULL; } fd = pathopen(source); if (fd == -1) fatal("can't open source file \"%s\" for reading (%s)", source, strerror(errno)); len = optimal_bufsize(fd); if (buf) free(buf); emalloc(buf, char *, len + SLOP, "get_src_buf"); lexptr_begin = buf + SLOP; samefile = 1; sourceline = 1; } else { /* * Here, we retain the current source line (up to length SLOP) * in the beginning of the buffer that was overallocated above */ int offset; int linelen; offset = lexptr - lexeme; for (scan = lexeme; scan > lexptr_begin; scan--) if (*scan == '\n') { scan++; break; } linelen = lexptr - scan; if (linelen > SLOP) linelen = SLOP; thisline = buf + SLOP - linelen; memcpy(thisline, scan, linelen); lexeme = buf + SLOP - offset; lexptr_begin = thisline; } n = read(fd, buf + SLOP, len); if (n == -1) fatal("can't read sourcefile \"%s\" (%s)", source, strerror(errno)); if (n == 0) { samefile = 0; nextfile++; len = 0; return get_src_buf(); } lexptr = buf + SLOP; lexend = lexptr + n; return buf;}#define tokadd(x) (*token++ = (x), token == tokend ? tokexpand() : token)char *tokexpand(){ static int toksize = 60; int tokoffset; tokoffset = token - tokstart; toksize *= 2; if (tokstart) erealloc(tokstart, char *, toksize, "tokexpand"); else emalloc(tokstart, char *, toksize, "tokexpand"); tokend = tokstart + toksize; token = tokstart + tokoffset; return token;}#if DEBUGcharnextc() { if (lexptr && lexptr < lexend) return *lexptr++; else if (get_src_buf()) return *lexptr++; else return '\0';}#else#define nextc() ((lexptr && lexptr < lexend) ? \ *lexptr++ : \ (get_src_buf() ? *lexptr++ : '\0') \ )#endif#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr)/* * Read the input and turn it into tokens. */static intyylex(){ register int c; int seen_e = 0; /* These are for numbers */ int seen_point = 0; int esc_seen; /* for literal strings */ int low, mid, high; static int did_newline = 0; char *tokkey; if (!nextc()) return 0; pushback(); lexeme = lexptr; thisline = NULL; if (want_regexp) { int in_brack = 0; want_regexp = 0; token = tokstart; while ((c = nextc()) != 0) { switch (c) { case '[': in_brack = 1; break; case ']': in_brack = 0; break; case '\\': if ((c = nextc()) == '\0') { yyerror("unterminated regexp ends with \\ at end of file"); } else if (c == '\n') { sourceline++; continue; } else tokadd('\\'); break; case '/': /* end of the regexp */ if (in_brack) break; pushback(); tokadd('\0'); yylval.sval = tokstart; return REGEXP; case '\n': pushback(); yyerror("unterminated regexp"); case '\0': yyerror("unterminated regexp at end of file"); } tokadd(c); } }retry: while ((c = nextc()) == ' ' || c == '\t') ; lexeme = lexptr ? lexptr - 1 : lexptr; thisline = NULL; token = tokstart; yylval.nodetypeval = Node_illegal; switch (c) { case 0: return 0; case '\n': sourceline++; return NEWLINE; case '#': /* it's a comment */ while ((c = nextc()) != '\n') { if (c == '\0') return 0; } sourceline++; return NEWLINE; case '\\':#ifdef RELAXED_CONTINUATION if (!do_unix) { /* strip trailing white-space and/or comment */ while ((c = nextc()) == ' ' || c == '\t') continue; if (c == '#') while ((c = nextc()) != '\n') if (!c) break; pushback(); }#endif /*RELAXED_CONTINUATION*/ if (nextc() == '\n') { sourceline++; goto retry; } else yyerror("inappropriate use of backslash"); break; case '$': want_assign = 1; return '$'; case ')': case ']': case '(': case '[': case ';': case ':': case '?': case '{': case ',': return c; case '*': if ((c = nextc()) == '=') { yylval.nodetypeval = Node_assign_times; return ASSIGNOP; } else if (do_posix) { pushback(); return '*'; } else if (c == '*') { /* make ** and **= aliases for ^ and ^= */ static int did_warn_op = 0, did_warn_assgn = 0; if (nextc() == '=') { if (do_lint && ! did_warn_assgn) { did_warn_assgn = 1; warning("**= is not allowed by POSIX"); } yylval.nodetypeval = Node_assign_exp; return ASSIGNOP; } else { pushback(); if (do_lint && ! did_warn_op) { did_warn_op = 1; warning("** is not allowed by POSIX"); } return '^'; } } pushback(); return '*'; case '/': if (want_assign) { if (nextc() == '=') { yylval.nodetypeval = Node_assign_quotient; return ASSIGNOP; } pushback(); } return '/'; case '%': if (nextc() == '=') { yylval.nodetypeval = Node_assign_mod; return ASSIGNOP; } pushback(); return '%'; case '^': { static int did_warn_op = 0, did_warn_assgn = 0; if (nextc() == '=') { if (do_lint && ! did_warn_assgn) { did_warn_assgn = 1; warning("operator `^=' is not supported in old awk"); } yylval.nodetypeval = Node_assign_exp; return ASSIGNOP; } pushback(); if (do_lint && ! did_warn_op) { did_warn_op = 1; warning("operator `^' is not supported in old awk"); } return '^'; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -