📄 mungegrammar
字号:
#! /usr/staff/bin/perl# perl script to create files scangen.in, fmq.in, actions.cc, and tokens.h# from grammar file (stdin).# The motivation is that these four files contain a great deal of# redundant information. Creating them automatically from a single source# frees the programmer/student from the burden of keeping them all consistent.# Does very little error checking. Just a pre-processor.# open output files:open (SCANGEN, ">scangen.in") || die "can't write scangen.in\n";open (FMQ, ">fmq.in") || die "can't write fmq.in\n";open (TOKENS, ">tokens.h") || die "can't write tokens.h\n";open (ACTIONS, ">actions.cc") || die "can't write actions.cc\n";########################################## header of scangen.in:# NB: there is no support for comments in scangen inputprint SCANGEN <<EOF;OPTIONStables, optimize, unreachableCLASSEOF########################################## header of fmq.in:print FMQ "This file was generated automatically, " . `date` . "\n";print FMQ <<EOF;*fmqbinary notext*define*terminalsEOF########################################## header of tokens.h:print TOKENS "// This file was generated automatically, " . `date` . "\n\n";print TOKENS "// Major and minor token numbers.\n\n";print TOKENS "#define MAJ_SPACE 0\n";########################################## header of actions.cc:print ACTIONS "// This file was generated automatically, " . `date` . "\n\n";print ACTIONS <<EOF;// C++ embodiment of action routines.// Each routine has a number. To execute a routine,// the parser calls do_action, passing the action number// as a parameter. Do_action is a monster case statement.#include <stdio.h>#include <stdlib.h>#include "strings.h"#include "inpbuf.h"#include "scanner.h"#include "tokens.h"#include "attributes.h"#include "actions.h"////////// Perform semantic actions.//void do_action(int action){ location_t l; // Location of current node. syntax_tree_t * p; // Temporary syntax tree node pointer. switch (action) {EOF#########################################$next_maj = 1; # next available major token number$next_min = 1; # next available minor token number# NB: fmq starts counting tokens at 1. We use 0 for SPACE.$exception_list = ""; # reserve words; exceptions to IDENTsub wrapup { # NB: footer of scangen.in was printed when we hit the PRODUCTIONS line # footer of fmq.in: print FMQ "\n*end\n"; # footer of actions.cc: print ACTIONS " }\n"; print ACTIONS "}\n"; # NB: tokens.h doesn't need a footer exit;}sub getline { do { if (eof(STDIN)) {do wrapup();} $_ = <STDIN>; s:\s*--.*$::; # delete comments } until (/\S/); # ignore blank lines}do getline(); # prime the pump/\bCHARACTERS\b/ || die "'CHARACTERS' expected at grammar line $.\n";do getline(); # toss CHARACTERS linewhile (! /\bTOKENS\b/) { print SCANGEN; do getline();}do getline(); # toss TOKENS lineprint SCANGEN "\nDEFINITION\n";sub put_token { if (s/^"(.*)"/\1/) { # IDENT exception if ($exception_list) { $exception_list .= ",\n"; } $exception_list .= " '" . $1 . "' {" . $next_maj . "," . $next_min . "}"; } else { if ($insert_image eq "SPACE") { $maj = 0; } else { $maj = $next_maj; } print SCANGEN " TOKEN ", $tok_name, " {", $maj, ",", $next_min, "} = ", $_, ";\n"; }}token: while (1) { # glue lines together until we have a complete token definition: while (! /;\s*$/) { last token if /\bPRODUCTIONS\b/; s/,\s*$/|/; # assume that trailing comma separates variants s/\n/ /; # don't just chop; change newline to space $saveline = $_; do getline(); $_ = $saveline . " " . $_; } s/^\s*(\w*)\s*("(.*)")?\s*(\d*)\s*,\s*(\d*)\s*//; $tok_name = $1; $insert_image = $3 || $tok_name; $ins_cost = $4; $del_cost = $5; if ($insert_image ne "SPACE") { print FMQ $insert_image, " ", $ins_cost, " ", $del_cost, "\n"; print TOKENS "#define MAJ_", $tok_name, " ", $next_maj, "\n"; } s/\s*;\s*//; # delete trailing semicolon if (s/^=\s*//) { # singular token if ($tok_name eq "IDENT") { $ident_regexp = $_; # save for later $ident_maj = $next_maj; $ident_min = $next_min; } else { do put_token(); } print TOKENS "#define MIN_", $tok_name, " ", $next_min, "\n"; $next_min++; } else { # token with variants s/^:\s*// || die "invalid syntax at grammar line $.\n"; $_ .= "|"; # so all variants end with '|' while ($_) { s/^(\w*)\s*=\s*([^|]*)\|\s*(.*)/\2/ || die "invalid syntax at grammar line $.\n"; $tok_name = $1; $rest = $3; do put_token(); print TOKENS "#define MIN_", $tok_name, " ", $next_min, "\n"; $next_min++; $_ = $rest; } } $next_maj++ unless ($insert_image eq "SPACE"); $_ = "";}# identifier definition, with exception listprint SCANGEN " TOKEN IDENT {", $ident_maj, ",", $ident_min, "} = ", $ident_regexp, "\n EXCEPT\n", $exception_list, ";\n";do getline(); # toss PRODUCTIONS lineprint FMQ "\n*productions\n\n";$next_action = 1;$state = "syntax"; # as opposed to "code"while (1) { if ($state eq "syntax") { if (!(/^\S/ || /\s*::=/)) { # continuation line $_ = "... " . $_ ; } if (s/\[\[(.*)/"\#" . $next_action . " "/e) { print FMQ; $_ = $1; $state = "code"; print ACTIONS "case ", $next_action, ":\n"; $next_action++; } else { print FMQ; $_ = ""; } } else { # code if (s/\]\](.*)//) { print ACTIONS if /\S/; print ACTIONS "break;\n"; $_ = $1; $state = "syntax"; } else { print ACTIONS; $_ = ""; } } do getline() unless /\S/;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -