📄 scheme.vtc
字号:
// This file is intended as an example of a large-scale VT application. As// an interpreter built on top of an interpreter, it is not terribly useful,// but it illustrates a lot of VTC.//// The interpreter is a fairly faithful subset of scheme. It lacks// fluid-let, variable-argument lambda expressions, parsing improper lists,// support for scheme's data types. It has few primitives, although the// system is open-ended. The interface is quite primitive; it accepts only// one line per expression, and has no provisions for comments. The// prettyprinter will go into an infinite loop (consuming memory as it goes)// if you print a circular list--but then, so does MIT Scheme. Use ^CB to// abort an infinite loop.//// The interpreter's performance is fair. On a standard workstation, there// is no delay in evaluation until we start getting into reasonably// complicated tasks, such as exponentiation with Church numerals (see the// end of this file). The interpreter does not optimize for tail-recursion,// and the evaluator would require a complete rewrite to deal with such// things as continuations.//// Some background on the distribution functions we use://// The distribution's concept of a "list" is an array whose first element// is the length of the list. This obviously conflicts with the Scheme// definition, and to avoid confusion, we will alias these functions to// replace "list" with "array".//// For the purposes of this file, all we need is new_list(), which returns// a new (distribution) list, and add_list(), which adds an element to its// end. We will alias them to new_array() and add_array().//// We also use the AVL tree mangement functions. make_tree(fn) returns a// tree whose comparison function is <fn>. insert_tree(tree, key, data)// inserts an element into the tree, and find_tree(tree, key) returns the// data associated with a key, or NULL if it isn't found. //// skipspaces() takes a string pointer and returns a pointer to the first// non-space character after it.// This is the most general way to alias functions that take variable// numbers of arguments. It's not really necessary in this case.func new_array() --> callv(.new_list, argc, argv)func add_array() --> callv(.add_list, argc, argv)func del_array() --> callv(.del_list, argc, argv)// Read-Eval-Print loopfunc rep() [line] { rep_loop = 1; while (1) { line = read(); // Pass along lines starting with \ or /, otherwise parse if (strchr("\\/", *line)) pass(active, line); else parse_scheme(line); }}// The error function aborts the currently-running program, making// exception-handling fairly easy. We do have to reset the state of// the environment globals, though.func scheme_error() { printf("Error: %s\n", callv(.bprintf, argc, argv)); scheme_reset(); abort();}func scheme_reset() { env_stack = base(env_stack); cur_env = *env_stack; if (rep_loop) detach(.rep);}// TypesTenv ?:= new_assoc(); // EnvironmentsTdata ?:= new_assoc(); // DataTpair ?:= new_assoc(); // PairsTproc ?:= new_assoc(); // Compound procedures// Environments consist of a pointer to the parent environment and a tree// of bindings. When we want to search for a binding, we search for it// in the given environment and up the chain of parent environments.// Construct a new environmentfunc make_env(parent) [env] { env = alloc(2, Tenv); env->parent = parent; env->symbols = make_tree(.stricmp); return env;}// Unless scheme_allow_rebind has been set, we cannot bind a variable twice// in an environment. This can be exceedingly annoying in the case of the// global environment, so we have the flag. VTC allows us to play fast and// loose with globals this way; we do not need any declaration outside the// function of the existence of scheme_allow_rebind.func env_add_sym(env, sym, data) { if (!scheme_allow_rebind && find_tree(env->symbols, sym)) scheme_error("Repeated binding of symbol %s", sym); insert_tree(env->symbols, sym, data);}// When we add primitives, we don't want to complain about rebinding.func sys_env_add_sym(env, sym, data) { insert_tree(env->symbols, sym, data); }// The symbol to mutate may not be in the current environment, so we// recursively search up the chain. Recursion in VTC is completely// correct theoretically--that is, it is impossible to blow the stack// without using up the host system's memory. It is worth noting that// VTC does no optimization for tail-recursion, however.func env_mutate_sym(env, sym, new_data) { if (find_tree(env->symbols, sym)) insert_tree(env->symbols, sym, new_data); else if (env->parent) env_mutate_sym(env->parent, sym, new_data); else scheme_error("Mutation of nonexistent symbol %s", sym);}// Recursively search for a bindingfunc env_find_sym(env, sym) { return find_tree(env->symbols, sym) ? : (env->parent ? env_find_sym(env->parent, sym) : NULL);}// env_stack and cur_env are the environment globals. The push_env() and// pop_env() abstractions are used in all cases except for eval_letstar(),// which has to remember the current environment position.env_stack ?:= alloc(16) - 1;func push_env(env) { cur_env = *++env_stack = env; }func pop_env() { cur_env = *--env_stack; }if (!global_env) { global_env = make_env(NULL); push_env(global_env);}// Data// A piece of Scheme data is a type attached to a value. If we were// terribly concerned about space, we would store integers, strings,// and simple procedures as just integers, strings, and function pointers,// and use a type() function to recover the type of a piece of data, but// space is a lost cause anyway.// Constructor for datafunc make_data(type, val) [data] { data = alloc(2, Tdata); data->type = type; data->val = val; return data;}// The enum() distribution function assigns incremental integers to a list// of variables.enum(&ST_INT, &ST_STR, &ST_SYM, &ST_SIMPROC, &ST_COMPROC, &ST_PAIR, &ST_DIST);// We will use VTC globals to represent the distinguished values nil, false,// and undefined.nil = make_data(ST_DIST, 0);false = make_data(ST_DIST, 1);undefined = make_data(ST_DIST, 2);// Shortcuts involving datafunc symbol(sym) --> make_data(ST_SYM, sym)func data_true(data) --> (data != false && data != nil)// Error functions will use this global.stypenames = table("int", "str", "symbol", "simple proc", "compound proc", \ "pair", "distinguished");// Pairs require another data structure because they contain two elements.// As a convenience, we define some simple pair-handling shortcuts for// readability (e.g. car(cdr(cdr(data))) is much more readable than// data->val->cdr->val->cdr->val->car).// Construct a pairfunc make_pair(car, cdr) [pair] { pair = alloc(2, Tpair); pair->car = car; pair->cdr = cdr; return pair;}func cons(a, b) --> make_data(ST_PAIR, make_pair(a, b))func car(data) --> data->val->carfunc cdr(data) --> data->val->cdrfunc cadr(data) --> data->val->cdr->val->car// We also have some rudimentary list handling functions. This is where// things would start to get confusing if we were still using distribution// list management functions with the same names--we'd be using make_list()// to get a Scheme list of two elements and new_list() to get a distribution// list, and so forth.func make_list(a, b) { return cons(a, cons(b, nil)); }// In general, iteration in VTC is much faster than recursion.// As long as we have a pair, look at its cdr. Once we are done cdring down// any list structure we may have had, we know that <data> was a list if we// are now looking at a nil pointer.func is_list(data) { for (; data->type == ST_PAIR; data = cdr(data)); return (data == nil);}func length_list(data) [n] { for (n = 0; data->type == ST_PAIR; data = cdr(data), n++); return n;}// Shortcut for checking legality of special forms.func is_list_len(data, n) --> is_list(data) && length_list(data) == n// Compound procedures consist of a list of parameters, the procedure body,// and the environment in which the procedure was defined, for use as the// parent of the environment in which the procedure body will run.// Make a procedurefunc make_proc(params, body, parent) [proc] { proc = alloc(3, Tproc); proc->params = params; proc->body = body; proc->parent = parent; return proc;}// Only eval_lambda(), apply_compound(), and the prettyprinter deal with// procedures directly.// Eval-Print portion of REP loopfunc parse_scheme(text) [data] { data = read_expr(&text); // We should have only one expression on the line if (*text) printf("Warning: extraneous text: %s\n", text); printf("--> %s\n", pformat(eval_scheme(data)));}// The parser's job is to convert a string into a Scheme data structure which// the evaluator will then process. The read_*() functions accept a pointer// to a string pointer, return the Scheme data structure associated that the// string corresponds to, and assigns a new string pointer to the argument// pointing to the end of the expression that they read. As Scheme syntax// is defined recursively, so are the parsing functions.// The next available character is usually enough to tell what type of// expression to expect. The exception is symbols like "1+". We will// deal with these in read_int().func read_expr(sptr) [s] { s = *sptr = skipspaces(*sptr); if (!*s) scheme_error("Unexpected end of line"); if (*s == '(') return read_list(sptr); if (isdigit(*s)) return read_int(sptr); if (*s == '"') return read_string(sptr); if (*s == '#') return read_dist(sptr); if (*s == '\'') return read_quoted(sptr); return read_symbol(sptr);}// Read a listfunc read_list(sptr) { (*sptr)++; return read_list_elem(sptr);}func read_list_elem(sptr) [elem] { *sptr = skipspaces(*sptr); if (**sptr == ')') { /* End of the line */ (*sptr)++; return nil; } else return cons(read_expr(sptr), read_list_elem(sptr));}// That last line would be a big no-no in C, since the read_list_elem() could// theoretically be evaluated before the read_expr() (and usually would be, in// practice). VTC always evaluates left-to-right, though.// Read an integerfunc read_int(sptr) [i, s] { i = atoi(*sptr); for (s = *sptr; isdigit(*s); s++); // Actually, we're not certain that what we were supposed to read // was an integer. If we haven't hit a ')' or ' ' or the end of // the line, we're really looking at a symbol that happened to begin // with a number. So go back and read the symbol. if (*s && *s != ' ' && *s != ')') return read_symbol(sptr); *sptr = s; return make_data(ST_INT, i);}// Read a string. We allow escaping of \ and " characters.func read_string(sptr) [s, str, norm] { s = *sptr + 1; str = ""; while (s[norm = strcspn(s, "\\\"")]) { strcat(str, s, norm); if (s[norm] == '\"') { *sptr = s + norm + 1; return make_data(ST_STR, str); } str[strlen(str)] = s[norm + 1]; s += norm + 2; } scheme_error("Unterminated string");}// Read a distinguished value. This isn't a very open-ended function, but// it'll do.func read_dist(sptr) [s, norm] { s = *sptr + 1; norm = strcspn(s, ") "); *sptr = s + norm; if (!stricmp("f", s, norm)) return false; else if (!stricmp("nil", s, norm)) return nil; else if (!stricmp("[undefined value]", s, norm)) return undefined; scheme_error("Unknown distinguished value");}// Read the next expression and parse it as (quote <expression>)func read_quoted(sptr) { (*sptr)++; return make_list(symbol("quote"), read_expr(sptr));}// Read a symbolfunc read_symbol(sptr) [s, norm] { s = *sptr; norm = strcspn(s, ") "); *sptr = s + norm; return symbol(strcpy("", s, norm));}// The strcpy("", s, norm) is conceptually unclear, but it's the easiest// way to produce a copy of a substring. field() will work, but because of// peculiarities of strdup(), it will end up remembering everything before// the string as well, which is undesirable.// The prettyprinter is also recursive. When we pformat() a piece of data,// we look up the data type in a table of functions and call the appropriate// one. All these functions return strings.func pformat_int(data) --> itoa(data->val)func pformat_str(data) --> stringconst(data->val)func pformat_sym(data) --> data->valfunc pformat_simproc(data) { return "#[simple procedure \"" + (func_name(data->val) + 4) + "\"]";}// pformat_comproc() expands lambda definitions if we want it to. Note that// the body of a lambda expression may contain references to symbols in// enclosing environments not visible from outside the lambda expression, so// expanding the body may not be enough to make it clear what a procedure will// do.func pformat_comproc(data) [pstr, i] { if (!scheme_expand_comproc) return "#[compound procedure]"; for (pstr = "", i = 1; i <= *data->val->params; i++) strcat(pstr, " " + data->val->params[i]); return bprintf("(lambda (%s) %s)", pstr + 1, pformat(data->val->body));}// We format lists starting with 'quote' specially.func pformat_pair(data) [s] { if (car(data)->type == ST_SYM && !stricmp(car(data)->val, "quote") && is_list_len(cdr(data), 1)) return "'" + pformat(cadr(data)); s = "(" + pformat(car(data)); while (cdr(data)->type == ST_PAIR) { strcat(s, " " + pformat(cadr(data))); data = cdr(data); } if (cdr(data) == nil)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -