📄 preproc.c
字号:
/* -*- mode: c; c-file-style: "bsd" -*- */
/* preproc.c macro preprocessor for the Netwide Assembler
*
* The Netwide Assembler is copyright (C) 1996 Simon Tatham and
* Julian Hall. All rights reserved. The software is
* redistributable under the licence given in the file "Licence"
* distributed in the NASM archive.
*
* initial version 18/iii/97 by Simon Tatham
*/
/* Typical flow of text through preproc
*
* pp_getline gets tokenised lines, either
*
* from a macro expansion
*
* or
* {
* read_line gets raw text from stdmacpos, or predef, or current input file
* tokenise converts to tokens
* }
*
* expand_mmac_params is used to expand %1 etc., unless a macro is being
* defined or a false conditional is being processed
* (%0, %1, %+1, %-1, %%foo
*
* do_directive checks for directives
*
* expand_smacro is used to expand single line macros
*
* expand_mmacro is used to expand multi-line macros
*
* detoken is used to convert the line back to text
*/
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include "nasm.h"
#include "nasmlib.h"
typedef struct SMacro SMacro;
typedef struct MMacro MMacro;
typedef struct Context Context;
typedef struct Token Token;
typedef struct Blocks Blocks;
typedef struct Line Line;
typedef struct Include Include;
typedef struct Cond Cond;
typedef struct IncPath IncPath;
/*
* Store the definition of a single-line macro.
*/
struct SMacro {
SMacro *next;
char *name;
int casesense;
int nparam;
int in_progress;
Token *expansion;
};
/*
* Store the definition of a multi-line macro. This is also used to
* store the interiors of `%rep...%endrep' blocks, which are
* effectively self-re-invoking multi-line macros which simply
* don't have a name or bother to appear in the hash tables. %rep
* blocks are signified by having a NULL `name' field.
*
* In a MMacro describing a `%rep' block, the `in_progress' field
* isn't merely boolean, but gives the number of repeats left to
* run.
*
* The `next' field is used for storing MMacros in hash tables; the
* `next_active' field is for stacking them on istk entries.
*
* When a MMacro is being expanded, `params', `iline', `nparam',
* `paramlen', `rotate' and `unique' are local to the invocation.
*/
struct MMacro {
MMacro *next;
char *name;
int casesense;
int nparam_min, nparam_max;
int plus; /* is the last parameter greedy? */
int nolist; /* is this macro listing-inhibited? */
int in_progress;
Token *dlist; /* All defaults as one list */
Token **defaults; /* Parameter default pointers */
int ndefs; /* number of default parameters */
Line *expansion;
MMacro *next_active;
MMacro *rep_nest; /* used for nesting %rep */
Token **params; /* actual parameters */
Token *iline; /* invocation line */
int nparam, rotate, *paramlen;
unsigned long unique;
int lineno; /* Current line number on expansion */
};
/*
* The context stack is composed of a linked list of these.
*/
struct Context {
Context *next;
SMacro *localmac;
char *name;
unsigned long number;
};
/*
* This is the internal form which we break input lines up into.
* Typically stored in linked lists.
*
* Note that `type' serves a double meaning: TOK_SMAC_PARAM is not
* necessarily used as-is, but is intended to denote the number of
* the substituted parameter. So in the definition
*
* %define a(x,y) ( (x) & ~(y) )
*
* the token representing `x' will have its type changed to
* TOK_SMAC_PARAM, but the one representing `y' will be
* TOK_SMAC_PARAM+1.
*
* TOK_INTERNAL_STRING is a dirty hack: it's a single string token
* which doesn't need quotes around it. Used in the pre-include
* mechanism as an alternative to trying to find a sensible type of
* quote to use on the filename we were passed.
*/
struct Token {
Token *next;
char *text;
SMacro *mac; /* associated macro for TOK_SMAC_END */
int type;
};
enum {
TOK_WHITESPACE = 1, TOK_COMMENT, TOK_ID, TOK_PREPROC_ID, TOK_STRING,
TOK_NUMBER, TOK_SMAC_END, TOK_OTHER, TOK_SMAC_PARAM,
TOK_INTERNAL_STRING
};
/*
* Multi-line macro definitions are stored as a linked list of
* these, which is essentially a container to allow several linked
* lists of Tokens.
*
* Note that in this module, linked lists are treated as stacks
* wherever possible. For this reason, Lines are _pushed_ on to the
* `expansion' field in MMacro structures, so that the linked list,
* if walked, would give the macro lines in reverse order; this
* means that we can walk the list when expanding a macro, and thus
* push the lines on to the `expansion' field in _istk_ in reverse
* order (so that when popped back off they are in the right
* order). It may seem cockeyed, and it relies on my design having
* an even number of steps in, but it works...
*
* Some of these structures, rather than being actual lines, are
* markers delimiting the end of the expansion of a given macro.
* This is for use in the cycle-tracking and %rep-handling code.
* Such structures have `finishes' non-NULL, and `first' NULL. All
* others have `finishes' NULL, but `first' may still be NULL if
* the line is blank.
*/
struct Line {
Line *next;
MMacro *finishes;
Token *first;
};
/*
* To handle an arbitrary level of file inclusion, we maintain a
* stack (ie linked list) of these things.
*/
struct Include {
Include *next;
FILE *fp;
Cond *conds;
Line *expansion;
char *fname;
int lineno, lineinc;
MMacro *mstk; /* stack of active macros/reps */
};
/*
* Include search path. This is simply a list of strings which get
* prepended, in turn, to the name of an include file, in an
* attempt to find the file if it's not in the current directory.
*/
struct IncPath {
IncPath *next;
char *path;
};
/*
* Conditional assembly: we maintain a separate stack of these for
* each level of file inclusion. (The only reason we keep the
* stacks separate is to ensure that a stray `%endif' in a file
* included from within the true branch of a `%if' won't terminate
* it and cause confusion: instead, rightly, it'll cause an error.)
*/
struct Cond {
Cond *next;
int state;
};
enum {
/*
* These states are for use just after %if or %elif: IF_TRUE
* means the condition has evaluated to truth so we are
* currently emitting, whereas IF_FALSE means we are not
* currently emitting but will start doing so if a %else comes
* up. In these states, all directives are admissible: %elif,
* %else and %endif. (And of course %if.)
*/
COND_IF_TRUE, COND_IF_FALSE,
/*
* These states come up after a %else: ELSE_TRUE means we're
* emitting, and ELSE_FALSE means we're not. In ELSE_* states,
* any %elif or %else will cause an error.
*/
COND_ELSE_TRUE, COND_ELSE_FALSE,
/*
* This state means that we're not emitting now, and also that
* nothing until %endif will be emitted at all. It's for use in
* two circumstances: (i) when we've had our moment of emission
* and have now started seeing %elifs, and (ii) when the
* condition construct in question is contained within a
* non-emitting branch of a larger condition construct.
*/
COND_NEVER
};
#define emitting(x) ( (x) == COND_IF_TRUE || (x) == COND_ELSE_TRUE )
/*
* These defines are used as the possible return values for do_directive
*/
#define NO_DIRECTIVE_FOUND 0
#define DIRECTIVE_FOUND 1
/*
* Condition codes. Note that we use c_ prefix not C_ because C_ is
* used in nasm.h for the "real" condition codes. At _this_ level,
* we treat CXZ and ECXZ as condition codes, albeit non-invertible
* ones, so we need a different enum...
*/
static const char *conditions[] = {
"a", "ae", "b", "be", "c", "cxz", "e", "ecxz", "g", "ge", "l", "le",
"na", "nae", "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no",
"np", "ns", "nz", "o", "p", "pe", "po", "s", "z"
};
enum {
c_A, c_AE, c_B, c_BE, c_C, c_CXZ, c_E, c_ECXZ, c_G, c_GE, c_L, c_LE,
c_NA, c_NAE, c_NB, c_NBE, c_NC, c_NE, c_NG, c_NGE, c_NL, c_NLE, c_NO,
c_NP, c_NS, c_NZ, c_O, c_P, c_PE, c_PO, c_S, c_Z
};
static int inverse_ccs[] = {
c_NA, c_NAE, c_NB, c_NBE, c_NC, -1, c_NE, -1, c_NG, c_NGE, c_NL, c_NLE,
c_A, c_AE, c_B, c_BE, c_C, c_E, c_G, c_GE, c_L, c_LE, c_O, c_P, c_S,
c_Z, c_NO, c_NP, c_PO, c_PE, c_NS, c_NZ
};
/*
* Directive names.
*/
static const char *directives[] = {
"%arg",
"%assign", "%clear", "%define", "%elif", "%elifctx", "%elifdef",
"%elifid", "%elifidn", "%elifidni", "%elifmacro", "%elifnctx",
"%elifndef",
"%elifnid", "%elifnidn", "%elifnidni", "%elifnmacro", "%elifnnum",
"%elifnstr",
"%elifnum", "%elifstr", "%else", "%endif", "%endm", "%endmacro",
"%endrep", "%error", "%exitrep", "%iassign", "%idefine", "%if",
"%ifctx", "%ifdef", "%ifid", "%ifidn", "%ifidni", "%ifmacro",
"%ifnctx",
"%ifndef", "%ifnid", "%ifnidn", "%ifnidni", "%ifnmacro", "%ifnnum",
"%ifnstr", "%ifnum", "%ifstr", "%imacro", "%include",
"%ixdefine", "%line",
"%local",
"%macro", "%pop", "%push", "%rep", "%repl", "%rotate",
"%stacksize",
"%strlen", "%substr", "%undef", "%xdefine"
};
enum {
PP_ARG,
PP_ASSIGN, PP_CLEAR, PP_DEFINE, PP_ELIF, PP_ELIFCTX, PP_ELIFDEF,
PP_ELIFID, PP_ELIFIDN, PP_ELIFIDNI, PP_ELIFMACRO, PP_ELIFNCTX,
PP_ELIFNDEF,
PP_ELIFNID, PP_ELIFNIDN, PP_ELIFNIDNI, PP_ELIFNMACRO, PP_ELIFNNUM,
PP_ELIFNSTR,
PP_ELIFNUM, PP_ELIFSTR, PP_ELSE, PP_ENDIF, PP_ENDM, PP_ENDMACRO,
PP_ENDREP, PP_ERROR, PP_EXITREP, PP_IASSIGN, PP_IDEFINE, PP_IF,
PP_IFCTX, PP_IFDEF, PP_IFID, PP_IFIDN, PP_IFIDNI, PP_IFMACRO,
PP_IFNCTX,
PP_IFNDEF, PP_IFNID, PP_IFNIDN, PP_IFNIDNI, PP_IFNMACRO, PP_IFNNUM,
PP_IFNSTR, PP_IFNUM, PP_IFSTR, PP_IMACRO, PP_INCLUDE,
PP_IXDEFINE, PP_LINE,
PP_LOCAL,
PP_MACRO, PP_POP, PP_PUSH, PP_REP, PP_REPL, PP_ROTATE,
PP_STACKSIZE,
PP_STRLEN, PP_SUBSTR, PP_UNDEF, PP_XDEFINE
};
/* If this is a an IF, ELIF, ELSE or ENDIF keyword */
static int is_condition(int arg)
{
return ((arg >= PP_ELIF) && (arg <= PP_ENDIF)) ||
((arg >= PP_IF) && (arg <= PP_IFSTR));
}
/* For TASM compatibility we need to be able to recognise TASM compatible
* conditional compilation directives. Using the NASM pre-processor does
* not work, so we look for them specifically from the following list and
* then jam in the equivalent NASM directive into the input stream.
*/
#ifndef MAX
# define MAX(a,b) ( ((a) > (b)) ? (a) : (b))
#endif
enum {
TM_ARG, TM_ELIF, TM_ELSE, TM_ENDIF, TM_IF, TM_IFDEF, TM_IFDIFI,
TM_IFNDEF, TM_INCLUDE, TM_LOCAL
};
static const char *tasm_directives[] = {
"arg", "elif", "else", "endif", "if", "ifdef", "ifdifi",
"ifndef", "include", "local"
};
static int StackSize = 4;
static char *StackPointer = "ebp";
static int ArgOffset = 8;
static int LocalOffset = 4;
static Context *cstk;
static Include *istk;
static IncPath *ipath = NULL;
static efunc _error; /* Pointer to client-provided error reporting function */
static evalfunc evaluate;
static int pass; /* HACK: pass 0 = generate dependencies only */
static unsigned long unique; /* unique identifier numbers */
static Line *predef = NULL;
static ListGen *list;
/*
* The number of hash values we use for the macro lookup tables.
* FIXME: We should *really* be able to configure this at run time,
* or even have the hash table automatically expanding when necessary.
*/
#define NHASH 31
/*
* The current set of multi-line macros we have defined.
*/
static MMacro *mmacros[NHASH];
/*
* The current set of single-line macros we have defined.
*/
static SMacro *smacros[NHASH];
/*
* The multi-line macro we are currently defining, or the %rep
* block we are currently reading, if any.
*/
static MMacro *defining;
/*
* The number of macro parameters to allocate space for at a time.
*/
#define PARAM_DELTA 16
/*
* The standard macro set: defined as `static char *stdmac[]'. Also
* gives our position in the macro set, when we're processing it.
*/
#include "macros.c"
static const char **stdmacpos;
/*
* The extra standard macros that come from the object format, if
* any.
*/
static const char **extrastdmac = NULL;
int any_extrastdmac;
/*
* Tokens are allocated in blocks to improve speed
*/
#define TOKEN_BLOCKSIZE 4096
static Token *freeTokens = NULL;
struct Blocks {
Blocks *next;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -