📄 tok_io.c
字号:
/*++
/* NAME
/* tok_io 3
/* SUMMARY
/* token I/O
/* PACKAGE
/* unproto
/* SYNOPSIS
/* #include "token.h"
/*
/* struct token *tok_get()
/*
/* void tok_flush(t)
/* struct token *t;
/*
/* void tok_show(t)
/* struct token *t;
/*
/* void tok_show_ch(t)
/* struct token *t;
/*
/* void put_str(s)
/* char *s;
/*
/* void put_ch(c)
/* int c;
/*
/* void put_nl()
/*
/* char *in_path;
/* int in_line;
/* DESCRIPTION
/* These functions read from stdin and write to stdout. The
/* tokenizer keeps track of where the token appeared in the input
/* stream; on output, this information is used to preserve correct
/* line number information (even after lots of token lookahead or
/* after function-header rewriting) so that diagnostics from the
/* next compiler stage make sense.
/*
/* tok_get() reads the next token from standard input. It returns
/* a null pointer when the end of input is reached.
/*
/* tok_show() displays the contents of a (possibly composite) token
/* on the standard output.
/*
/* tok_show_ch() displays the contents of a single-character token
/* on the standard output. The character should not be a newline.
/*
/* tok_flush() displays the contents of a (possibly composite) token
/* on the standard output and makes it available for re-use.
/*
/* put_str() writes a null-terminated string to standard output.
/* There should be no newline characters in the string argument.
/*
/* put_ch() writes one character to standard output. The character
/* should not be a newline.
/*
/* put_nl() outputs a newline character and adjusts the program's idea of
/* the current output line.
/*
/* The in_path and in_line variables contain the file name and
/* line number of the most recently read token.
/* BUGS
/* The tokenizer is just good enough for the unproto filter.
/* As a benefit, it is quite fast.
/* AUTHOR(S)
/* Wietse Venema
/* Eindhoven University of Technology
/* Department of Mathematics and Computer Science
/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands
/* LAST MODIFICATION
/* 92/01/15 21:52:59
/* VERSION/RELEASE
/* 1.3
/*--*/
static char io_sccsid[] = "@(#) tok_io.c 1.3 92/01/15 21:52:59";
/* C library */
#include <stdio.h>
#include <ctype.h>
extern char *strchr();
extern char *malloc();
extern char *realloc();
extern char *strcpy();
/* Application-specific stuff */
#include "token.h"
#include "vstring.h"
#include "error.h"
extern char *strsave(); /* XXX need include file */
/* Stuff to keep track of original source file name and position */
static char def_path[] = ""; /* default path name */
char *in_path = def_path; /* current input file name */
int in_line = 1; /* current input line number */
static char *out_path = def_path; /* last name in output line control */
static int out_line = 1; /* current output line number */
int last_ch; /* type of last output */
/* Forward declarations */
static int read_quoted();
static void read_comment();
static int backslash_newline();
static char *read_hex();
static char *read_octal();
static void fix_line_control();
/*
* Character input with one level of pushback. The INPUT() macro recursively
* strips backslash-newline pairs from the input stream. The UNPUT() macro
* should be used only for characters obtained through the INPUT() macro.
*
* After skipping a backslash-newline pair, the input line counter is not
* updated, and we continue with the same logical source line. We just
* update a counter with the number of backslash-newline sequences that must
* be accounted for (backslash_newline() updates the counter). At the end of
* the logical source line, an appropriate number of newline characters is
* pushed back (in tok_get()). I do not know how GCC handles this, but it
* seems to produce te same output.
*
* Because backslash_newline() recursively calls itself (through the INPUT()
* macro), we will run out of stack space, given a sufficiently long
* sequence of backslash-newline pairs.
*/
static char in_char = 0; /* push-back storage */
static int in_flag = 0; /* pushback available */
static int nl_compensate = 0; /* line continuation kluge */
#define INPUT(c) (in_flag ? (in_flag = 0, c = in_char) : \
(c = getchar()) != '\\' ? c : \
(c = getchar()) != '\n' ? (ungetc(c, stdin), c = '\\') : \
(c = backslash_newline()))
#define UNPUT(c) (in_flag = 1, in_char = c)
/* Directives that should be ignored. */
#ifdef IGNORE_DIRECTIVES
static char *ignore_directives[] = {
IGNORE_DIRECTIVES,
0,
};
#endif
/* Modified string and ctype stuff. */
#define STREQUAL(x,y) (*(x) == *(y) && strcmp((x),(y)) == 0)
#define ISALNUM(c) (isalnum(c) || (c) == '_')
#define ISALPHA(c) (isalpha(c) || (c) == '_')
#define ISSPACE(c) (isspace(c) && c != '\n')
#define ISDOT(c) (c == '.')
#define ISHEX(c) (isdigit(c) || strchr("abcdefABCDEF", c) != 0)
#define ISOCTAL(c) (isdigit(c) && (c) != '8' && (c) != '9')
/* Collect all characters that satisfy one condition */
#define COLLECT(v,c,cond) { \
register struct vstring *vs = v; \
register char *cp = vs->str; \
*cp++ = c; \
while (INPUT(c) != EOF) { \
if (cond) { \
if (VS_ADDCH(vs, cp, c) == 0) \
fatal("out of memory"); \
} else { \
UNPUT(c); \
break; \
} \
} \
*cp = 0; \
}
/* Ensure that output line information is correct */
#define CHECK_LINE_CONTROL(p,l) { if (out_path != (p) || out_line != (l)) \
fix_line_control((p),(l)); }
/* do_control - parse control line */
static int do_control()
{
struct token *t;
int line;
char *path;
/* Make sure that the directive shows up in the right place. */
CHECK_LINE_CONTROL(in_path, in_line);
while (t = tok_get()) {
switch (t->tokno) {
case TOK_WSPACE:
/* Ignore blanks after "#" token. */
tok_free(t);
break;
case TOK_NUMBER:
/*
* Line control is of the form: number pathname junk. Since we
* have no idea what junk the preprocessor may generate, we copy
* all line control tokens to stdout.
*/
put_str("# ");
line = atoi(t->vstr->str); /* extract line number */
tok_flush(t);
while ((t = tok_get()) && t->tokno == TOK_WSPACE)
tok_flush(t); /* copy white space */
if (t) { /* extract path name */
path = (t->tokno == '"') ? strsave(t->vstr->str) : in_path;
do {
tok_flush(t); /* copy until newline */
} while (t->tokno != '\n' && (t = tok_get()));
}
out_line = in_line = line; /* synchronize */
out_path = in_path = path; /* synchronize */
return;
#ifdef IGNORE_DIRECTIVES
case TOK_WORD:
/*
* Optionally ignore other #directives. This is only a partial
* solution, because the preprocessor will still see them.
*/
{
char **cpp;
char *cp = t->vstr->str;
for (cpp = ignore_directives; *cpp; cpp++) {
if (STREQUAL(cp, *cpp)) {
do {
tok_free(t);
} while (t->tokno != '\n' && (t = tok_get()));
return;
}
}
}
/* FALLTHROUGH */
#endif
default:
/* Pass through. */
put_ch('#');
do {
tok_flush(t);
} while (t->tokno != '\n' && (t = tok_get()));
return;
case 0:
/* Hit EOF, punt. */
put_ch('#');
return;
}
}
}
/* backslash_newline - fix up things after reading a backslash-newline pair */
static int backslash_newline()
{
register int c;
nl_compensate++;
return (INPUT(c));
}
/* tok_get - get next token */
static int last_tokno = '\n';
struct token *tok_get()
{
register struct token *t;
register int c;
int d;
/*
* Get one from the pool and fill it in. The loop is here in case we hit
* a preprocessor control line, which happens in a minority of all cases.
* We update the token input path and line info *after* backslash-newline
* processing or the newline compensation would go wrong.
*/
t = tok_alloc();
for (;;) {
if ((INPUT(c)) == EOF) {
tok_free(t);
return (0);
} else if ((t->line = in_line, t->path = in_path), !isascii(c)) {
t->vstr->str[0] = c;
t->vstr->str[1] = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -