📄 input.c
字号:
/*
* input.c: read the source form
*/
#include <stdio.h>
#include <assert.h>
#include <time.h>
#include "halibut.h"
#define TAB_STOP 8 /* for column number tracking */
static void setpos(input * in, char *fname)
{
in->pos.filename = fname;
in->pos.line = 1;
in->pos.col = (in->reportcols ? 1 : -1);
}
static void unget(input * in, int c, filepos * pos)
{
if (in->npushback >= in->pushbacksize)
{
in->pushbacksize = in->npushback + 16;
in->pushback = resize(in->pushback, in->pushbacksize);
}
in->pushback[in->npushback].chr = c;
in->pushback[in->npushback].pos = *pos; /* structure copy */
in->npushback++;
}
/* ---------------------------------------------------------------------- */
/*
* Macro subsystem
*/
typedef struct macro_Tag macro;
struct macro_Tag {
wchar_t *name, *text;
};
struct macrostack_Tag {
macrostack *next;
wchar_t *text;
int ptr, npushback;
filepos pos;
};
static int macrocmp(void *av, void *bv)
{
macro *a = (macro *) av, *b = (macro *) bv;
return ustrcmp(a->name, b->name);
}
static void
macrodef(tree234 * macros, wchar_t * name, wchar_t * text, filepos fpos)
{
macro *m = mknew(macro);
m->name = name;
m->text = text;
if (add234(macros, m) != m)
{
error(err_macroexists, &fpos, name);
sfree(name);
sfree(text);
}
}
static int
macrolookup(tree234 * macros, input * in, wchar_t * name, filepos * pos)
{
macro m, *gotit;
m.name = name;
gotit = find234(macros, &m, NULL);
if (gotit)
{
macrostack *expansion = mknew(macrostack);
expansion->next = in->stack;
expansion->text = gotit->text;
expansion->pos = *pos; /* structure copy */
expansion->ptr = 0;
expansion->npushback = in->npushback;
in->stack = expansion;
return TRUE;
} else
return FALSE;
}
static void macrocleanup(tree234 * macros)
{
int ti;
macro *m;
for (ti = 0; (m = (macro *) index234(macros, ti)) != NULL; ti++)
{
sfree(m->name);
sfree(m->text);
sfree(m);
}
freetree234(macros);
}
/*
* Can return EOF
*/
static int get(input * in, filepos * pos)
{
int pushbackpt = in->stack ? in->stack->npushback : 0;
if (in->npushback > pushbackpt)
{
--in->npushback;
if (pos)
*pos = in->pushback[in->npushback].pos; /* structure copy */
return in->pushback[in->npushback].chr;
} else if (in->stack)
{
wchar_t c = in->stack->text[in->stack->ptr];
if (in->stack->text[++in->stack->ptr] == L'\0')
{
macrostack *tmp = in->stack;
in->stack = tmp->next;
sfree(tmp);
}
return c;
} else if (in->currfp)
{
int c = getc(in->currfp);
if (c == EOF)
{
fclose(in->currfp);
in->currfp = NULL;
}
/* Track line numbers, for error reporting */
if (pos)
*pos = in->pos;
if (in->reportcols)
{
switch (c)
{
case '\t':
in->pos.col = 1 + (in->pos.col + TAB_STOP - 1) % TAB_STOP;
break;
case '\n':
in->pos.col = 1;
in->pos.line++;
break;
default:
in->pos.col++;
break;
}
} else
{
in->pos.col = -1;
if (c == '\n')
in->pos.line++;
}
/* FIXME: do input charmap translation. We should be returning
* Unicode here. */
return c;
} else
return EOF;
}
/*
* Lexical analysis of source files.
*/
typedef struct token_Tag token;
struct token_Tag {
int type;
int cmd, aux;
wchar_t *text;
filepos pos;
};
enum {
tok_eof, /* end of file */
tok_eop, /* end of paragraph */
tok_white, /* whitespace */
tok_word, /* a word or word fragment */
tok_cmd, /* \command */
tok_lbrace, /* { */
tok_rbrace /* } */
};
/* Halibut command keywords. */
enum {
c__invalid, /* invalid command */
c__comment, /* comment command (\#) */
c__escaped, /* escaped character */
c__nbsp, /* nonbreaking space */
c_A, /* appendix heading */
c_B, /* bibliography entry */
c_BR, /* bibliography rewrite */
c_C, /* chapter heading */
c_H, /* heading */
c_I, /* invisible index mark */
c_IM, /* index merge/rewrite */
c_K, /* capitalised cross-reference */
c_S, /* aux field is 0, 1, 2, ... */
c_U, /* unnumbered-chapter heading */
c_W, /* Web hyperlink */
c_L, /* Relative/local hyperlink */
c_b, /* bulletted list */
c_c, /* code */
c_cfg, /* configuration directive */
c_copyright, /* copyright statement */
c_cw, /* weak code */
c_date, /* document processing date */
c_define, /* macro definition */
c_e, /* emphasis */
c_i, /* visible index mark */
c_ii, /* uncapitalised visible index mark */
c_k, /* uncapitalised cross-reference */
c_R, /* free text cross-reference */
c_n, /* numbered list */
c_nocite, /* bibliography trickery */
c_preamble, /* document preamble text */
c_q, /* quote marks */
c_rule, /* horizontal rule */
c_title, /* document title */
c_u, /* aux field is char code */
c_versionid /* document RCS id */
};
/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
#define isnl(c) ( (c)==10 )
#define isdec(c) ( ((c)>='0'&&(c)<='9') )
#define fromdec(c) ( (c)-'0' )
#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
/*
* Keyword comparison function. Like strcmp, but between a wchar_t *
* and a char *.
*/
static int kwcmp(wchar_t const *p, char const *q)
{
int i;
do
{
i = *p - *q;
}
while (*p++ && *q++ && !i);
return i;
}
/*
* Match a keyword.
*/
static void match_kw(token * tok)
{
/*
* FIXME. The ids are explicit in here so as to allow long-name
* equivalents to the various very short keywords.
*
* This list must be sorted, it's searched using binary search.
*/
static const struct {
char const *name;
int id;
} keywords[] = {
{
"#", c__comment}
, /* comment command (\#) */
{
"-", c__escaped}
, /* nonbreaking hyphen */
{
"A", c_A}
, /* appendix heading */
{
"B", c_B}
, /* bibliography entry */
{
"BR", c_BR}
, /* bibliography rewrite */
{
"C", c_C}
, /* chapter heading */
{
"H", c_H}
, /* heading */
{
"I", c_I}
, /* invisible index mark */
{
"IM", c_IM}
, /* index merge/rewrite */
{
"K", c_K}
, /* capitalised cross-reference */
{
"L", c_L}
, /* Relative/local hyperlink */
{
"R", c_R}
, /* free text cross-reference */
{
"U", c_U}
, /* unnumbered-chapter heading */
{
"W", c_W}
, /* Web hyperlink */
{
"\\", c__escaped}
, /* escaped backslash (\\) */
{
"_", c__nbsp}
, /* nonbreaking space (\_) */
{
"b", c_b}
, /* bulletted list */
{
"c", c_c}
, /* code */
{
"cfg", c_cfg}
, /* configuration directive */
{
"copyright", c_copyright}
, /* copyright statement */
{
"cw", c_cw}
, /* weak code */
{
"date", c_date}
, /* document processing date */
{
"define", c_define}
, /* macro definition */
{
"e", c_e}
, /* emphasis */
{
"i", c_i}
, /* visible index mark */
{
"ii", c_ii}
, /* uncapitalised visible index mark */
{
"k", c_k}
, /* uncapitalised cross-reference */
{
"n", c_n}
, /* numbered list */
{
"nocite", c_nocite}
, /* bibliography trickery */
{
"preamble", c_preamble}
, /* document preamble text */
{
"q", c_q}
, /* quote marks */
{
"rule", c_rule}
, /* horizontal rule */
{
"title", c_title}
, /* document title */
{
"versionid", c_versionid}
, /* document RCS id */
{
"{", c__escaped}
, /* escaped lbrace (\{) */
{
"}", c__escaped}
, /* escaped rbrace (\}) */
};
int i, j, k, c;
/*
* Special cases: \S{0,1,2,...} and \uABCD. If the syntax
* doesn't match correctly, we just fall through to the
* binary-search phase.
*/
if (tok->text[0] == 'S')
{
/* We expect numeric characters thereafter. */
wchar_t *p = tok->text + 1;
int n;
if (!*p)
n = 1;
else
{
n = 0;
while (*p && isdec(*p))
{
n = 10 * n + fromdec(*p);
p++;
}
}
if (!*p)
{
tok->cmd = c_S;
tok->aux = n;
return;
}
} else if (tok->text[0] == 'u')
{
/* We expect hex characters thereafter. */
wchar_t *p = tok->text + 1;
int n = 0;
while (*p && ishex(*p))
{
n = 16 * n + fromhex(*p);
p++;
}
if (!*p)
{
tok->cmd = c_u;
tok->aux = n;
return;
}
}
i = -1;
j = sizeof(keywords) / sizeof(*keywords);
while (j - i > 1)
{
k = (i + j) / 2;
c = kwcmp(tok->text, keywords[k].name);
if (c < 0)
j = k;
else if (c > 0)
i = k;
else
{ /* c == 0 */
tok->cmd = keywords[k].id;
return;
}
}
tok->cmd = c__invalid;
}
/*
* Read a token from the input file, in the normal way (`normal' in
* the sense that code paragraphs work a different way).
*/
token get_token(input * in)
{
int c;
int nls;
token ret;
rdstring rs = { 0, 0, NULL };
filepos cpos;
ret.cmd = c__invalid;
ret.aux = FALSE;
ret.text = NULL; /* default */
c = get(in, &cpos);
ret.pos = cpos;
if (iswhite(c))
{ /* tok_white or tok_eop */
nls = 0;
do
{
if (isnl(c))
nls++;
}
while ((c = get(in, &cpos)) != EOF && iswhite(c));
if (c == EOF)
{
ret.type = tok_eof;
return ret;
}
unget(in, c, &cpos);
ret.type = (nls > 1 ? tok_eop : tok_white);
return ret;
} else if (c == EOF)
{ /* tok_eof */
ret.type = tok_eof;
return ret;
} else if (c == '\\')
{ /* tok_cmd */
c = get(in, &cpos);
if (c == '-' || c == '\\' || c == '_' ||
c == '#' || c == '{' || c == '}')
{
/* single-char command */
rdadd(&rs, (wchar_t)c);
} else if (c == 'u')
{
int len = 0;
do
{
rdadd(&rs, (wchar_t)c);
len++;
c = get(in, &cpos);
}
while (ishex(c) && len < 5);
unget(in, c, &cpos);
} else if (iscmd(c))
{
do
{
rdadd(&rs, (wchar_t)c);
c = get(in, &cpos);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -