📄 input.c
字号:
/* * input.c: read the source form */#include <stdio.h>#include <assert.h>#include <time.h>#include "halibut.h"#define TAB_STOP 8 /* for column number tracking */static void setpos(input * in, char *fname){ in->pos.filename = fname; in->pos.line = 1; in->pos.col = (in->reportcols ? 1 : -1);}static void unget(input * in, int c, filepos * pos){ if (in->npushback >= in->pushbacksize) { in->pushbacksize = in->npushback + 16; in->pushback = resize(in->pushback, in->pushbacksize); } in->pushback[in->npushback].chr = c; in->pushback[in->npushback].pos = *pos; /* structure copy */ in->npushback++;}/* ---------------------------------------------------------------------- *//* * Macro subsystem */typedef struct macro_Tag macro;struct macro_Tag { wchar_t *name, *text;};struct macrostack_Tag { macrostack *next; wchar_t *text; int ptr, npushback; filepos pos;};static int macrocmp(void *av, void *bv){ macro *a = (macro *) av, *b = (macro *) bv; return ustrcmp(a->name, b->name);}static voidmacrodef(tree234 * macros, wchar_t * name, wchar_t * text, filepos fpos){ macro *m = mknew(macro); m->name = name; m->text = text; if (add234(macros, m) != m) { error(err_macroexists, &fpos, name); sfree(name); sfree(text); }}static intmacrolookup(tree234 * macros, input * in, wchar_t * name, filepos * pos){ macro m, *gotit; m.name = name; gotit = find234(macros, &m, NULL); if (gotit) { macrostack *expansion = mknew(macrostack); expansion->next = in->stack; expansion->text = gotit->text; expansion->pos = *pos; /* structure copy */ expansion->ptr = 0; expansion->npushback = in->npushback; in->stack = expansion; return TRUE; } else return FALSE;}static void macrocleanup(tree234 * macros){ int ti; macro *m; for (ti = 0; (m = (macro *) index234(macros, ti)) != NULL; ti++) { sfree(m->name); sfree(m->text); sfree(m); } freetree234(macros);}/* * Can return EOF */static int get(input * in, filepos * pos){ int pushbackpt = in->stack ? in->stack->npushback : 0; if (in->npushback > pushbackpt) { --in->npushback; if (pos) *pos = in->pushback[in->npushback].pos; /* structure copy */ return in->pushback[in->npushback].chr; } else if (in->stack) { wchar_t c = in->stack->text[in->stack->ptr]; if (in->stack->text[++in->stack->ptr] == L'\0') { macrostack *tmp = in->stack; in->stack = tmp->next; sfree(tmp); } return c; } else if (in->currfp) { int c = getc(in->currfp); if (c == EOF) { fclose(in->currfp); in->currfp = NULL; } /* Track line numbers, for error reporting */ if (pos) *pos = in->pos; if (in->reportcols) { switch (c) { case '\t': in->pos.col = 1 + (in->pos.col + TAB_STOP - 1) % TAB_STOP; break; case '\n': in->pos.col = 1; in->pos.line++; break; default: in->pos.col++; break; } } else { in->pos.col = -1; if (c == '\n') in->pos.line++; } /* FIXME: do input charmap translation. We should be returning * Unicode here. */ return c; } else return EOF;}/* * Lexical analysis of source files. */typedef struct token_Tag token;struct token_Tag { int type; int cmd, aux; wchar_t *text; filepos pos;};enum { tok_eof, /* end of file */ tok_eop, /* end of paragraph */ tok_white, /* whitespace */ tok_word, /* a word or word fragment */ tok_cmd, /* \command */ tok_lbrace, /* { */ tok_rbrace /* } */};/* Halibut command keywords. */enum { c__invalid, /* invalid command */ c__comment, /* comment command (\#) */ c__escaped, /* escaped character */ c__nbsp, /* nonbreaking space */ c_A, /* appendix heading */ c_B, /* bibliography entry */ c_BR, /* bibliography rewrite */ c_C, /* chapter heading */ c_H, /* heading */ c_I, /* invisible index mark */ c_IM, /* index merge/rewrite */ c_K, /* capitalised cross-reference */ c_S, /* aux field is 0, 1, 2, ... */ c_U, /* unnumbered-chapter heading */ c_W, /* Web hyperlink */ c_L, /* Relative/local hyperlink */ c_b, /* bulletted list */ c_c, /* code */ c_cfg, /* configuration directive */ c_copyright, /* copyright statement */ c_cw, /* weak code */ c_date, /* document processing date */ c_define, /* macro definition */ c_e, /* emphasis */ c_i, /* visible index mark */ c_ii, /* uncapitalised visible index mark */ c_k, /* uncapitalised cross-reference */ c_R, /* free text cross-reference */ c_n, /* numbered list */ c_nocite, /* bibliography trickery */ c_preamble, /* document preamble text */ c_q, /* quote marks */ c_rule, /* horizontal rule */ c_title, /* document title */ c_u, /* aux field is char code */ c_versionid /* document RCS id */};/* Perhaps whitespace should be defined in a more Unicode-friendly way? */#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )#define isnl(c) ( (c)==10 )#define isdec(c) ( ((c)>='0'&&(c)<='9') )#define fromdec(c) ( (c)-'0' )#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))/* * Keyword comparison function. Like strcmp, but between a wchar_t * * and a char *. */static int kwcmp(wchar_t const *p, char const *q){ int i; do { i = *p - *q; } while (*p++ && *q++ && !i); return i;}/* * Match a keyword. */static void match_kw(token * tok){ /* * FIXME. The ids are explicit in here so as to allow long-name * equivalents to the various very short keywords. * * This list must be sorted, it's searched using binary search. */ static const struct { char const *name; int id; } keywords[] = { { "#", c__comment} , /* comment command (\#) */ { "-", c__escaped} , /* nonbreaking hyphen */ { "A", c_A} , /* appendix heading */ { "B", c_B} , /* bibliography entry */ { "BR", c_BR} , /* bibliography rewrite */ { "C", c_C} , /* chapter heading */ { "H", c_H} , /* heading */ { "I", c_I} , /* invisible index mark */ { "IM", c_IM} , /* index merge/rewrite */ { "K", c_K} , /* capitalised cross-reference */ { "L", c_L} , /* Relative/local hyperlink */ { "R", c_R} , /* free text cross-reference */ { "U", c_U} , /* unnumbered-chapter heading */ { "W", c_W} , /* Web hyperlink */ { "\\", c__escaped} , /* escaped backslash (\\) */ { "_", c__nbsp} , /* nonbreaking space (\_) */ { "b", c_b} , /* bulletted list */ { "c", c_c} , /* code */ { "cfg", c_cfg} , /* configuration directive */ { "copyright", c_copyright} , /* copyright statement */ { "cw", c_cw} , /* weak code */ { "date", c_date} , /* document processing date */ { "define", c_define} , /* macro definition */ { "e", c_e} , /* emphasis */ { "i", c_i} , /* visible index mark */ { "ii", c_ii} , /* uncapitalised visible index mark */ { "k", c_k} , /* uncapitalised cross-reference */ { "n", c_n} , /* numbered list */ { "nocite", c_nocite} , /* bibliography trickery */ { "preamble", c_preamble} , /* document preamble text */ { "q", c_q} , /* quote marks */ { "rule", c_rule} , /* horizontal rule */ { "title", c_title} , /* document title */ { "versionid", c_versionid} , /* document RCS id */ { "{", c__escaped} , /* escaped lbrace (\{) */ { "}", c__escaped} , /* escaped rbrace (\}) */ }; int i, j, k, c; /* * Special cases: \S{0,1,2,...} and \uABCD. If the syntax * doesn't match correctly, we just fall through to the * binary-search phase. */ if (tok->text[0] == 'S') { /* We expect numeric characters thereafter. */ wchar_t *p = tok->text + 1; int n; if (!*p) n = 1; else { n = 0; while (*p && isdec(*p)) { n = 10 * n + fromdec(*p); p++; } } if (!*p) { tok->cmd = c_S; tok->aux = n; return; } } else if (tok->text[0] == 'u') { /* We expect hex characters thereafter. */ wchar_t *p = tok->text + 1; int n = 0; while (*p && ishex(*p)) { n = 16 * n + fromhex(*p); p++; } if (!*p) { tok->cmd = c_u; tok->aux = n; return; } } i = -1; j = sizeof(keywords) / sizeof(*keywords); while (j - i > 1) { k = (i + j) / 2; c = kwcmp(tok->text, keywords[k].name); if (c < 0) j = k; else if (c > 0) i = k; else { /* c == 0 */ tok->cmd = keywords[k].id; return; } } tok->cmd = c__invalid;}/* * Read a token from the input file, in the normal way (`normal' in * the sense that code paragraphs work a different way). */token get_token(input * in){ int c; int nls; token ret; rdstring rs = { 0, 0, NULL }; filepos cpos; ret.cmd = c__invalid; ret.aux = FALSE; ret.text = NULL; /* default */ c = get(in, &cpos); ret.pos = cpos; if (iswhite(c)) { /* tok_white or tok_eop */ nls = 0; do { if (isnl(c)) nls++; } while ((c = get(in, &cpos)) != EOF && iswhite(c)); if (c == EOF) { ret.type = tok_eof; return ret; } unget(in, c, &cpos); ret.type = (nls > 1 ? tok_eop : tok_white); return ret; } else if (c == EOF) { /* tok_eof */ ret.type = tok_eof; return ret; } else if (c == '\\') { /* tok_cmd */ c = get(in, &cpos); if (c == '-' || c == '\\' || c == '_' || c == '#' || c == '{' || c == '}') { /* single-char command */ rdadd(&rs, (wchar_t)c); } else if (c == 'u') { int len = 0; do { rdadd(&rs, (wchar_t)c); len++; c = get(in, &cpos); } while (ishex(c) && len < 5); unget(in, c, &cpos); } else if (iscmd(c)) { do { rdadd(&rs, (wchar_t)c); c = get(in, &cpos); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -