preproc.c

来自「开源的nasm编译器源码,研究编译器原理很有帮且」· C语言代码 · 共 2,635 行 · 第 1/5 页
2,635 行
/* -*- mode: c; c-file-style: "bsd" -*- *//* preproc.c   macro preprocessor for the Netwide Assembler * * The Netwide Assembler is copyright (C) 1996 Simon Tatham and * Julian Hall. All rights reserved. The software is * redistributable under the licence given in the file "Licence" * distributed in the NASM archive. * * initial version 18/iii/97 by Simon Tatham *//* Typical flow of text through preproc * * pp_getline gets tokenised lines, either * *   from a macro expansion * * or *   { *   read_line  gets raw text from stdmacpos, or predef, or current input file *   tokenise   converts to tokens *   } * * expand_mmac_params is used to expand %1 etc., unless a macro is being * defined or a false conditional is being processed * (%0, %1, %+1, %-1, %%foo * * do_directive checks for directives * * expand_smacro is used to expand single line macros * * expand_mmacro is used to expand multi-line macros * * detoken is used to convert the line back to text */#include <stdio.h>#include <stdarg.h>#include <stdlib.h>#include <stddef.h>#include <string.h>#include <ctype.h>#include <limits.h>#include "nasm.h"#include "nasmlib.h"typedef struct SMacro SMacro;typedef struct MMacro MMacro;typedef struct Context Context;typedef struct Token Token;typedef struct Blocks Blocks;typedef struct Line Line;typedef struct Include Include;typedef struct Cond Cond;typedef struct IncPath IncPath;/* * Store the definition of a single-line macro. */struct SMacro{    SMacro *next;    char *name;    int casesense;    int nparam;    int in_progress;    Token *expansion;};/* * Store the definition of a multi-line macro. This is also used to * store the interiors of `%rep...%endrep' blocks, which are * effectively self-re-invoking multi-line macros which simply * don't have a name or bother to appear in the hash tables. %rep * blocks are signified by having a NULL `name' field. * * In a MMacro describing a `%rep' block, the `in_progress' field * isn't merely boolean, but gives the number of repeats left to * run. * * The `next' field is used for storing MMacros in hash tables; the * `next_active' field is for stacking them on istk entries. * * When a MMacro is being expanded, `params', `iline', `nparam', * `paramlen', `rotate' and `unique' are local to the invocation. */struct MMacro{    MMacro *next;    char *name;    int casesense;    int nparam_min, nparam_max;    int plus;			/* is the last parameter greedy? */    int nolist;			/* is this macro listing-inhibited? */    int in_progress;    Token *dlist;		/* All defaults as one list */    Token **defaults;		/* Parameter default pointers */    int ndefs;			/* number of default parameters */    Line *expansion;    MMacro *next_active;    MMacro *rep_nest;		/* used for nesting %rep */    Token **params;		/* actual parameters */    Token *iline;		/* invocation line */    int nparam, rotate, *paramlen;    unsigned long unique;    int lineno;			/* Current line number on expansion */};/* * The context stack is composed of a linked list of these. */struct Context{    Context *next;    SMacro *localmac;    char *name;    unsigned long number;};/* * This is the internal form which we break input lines up into. * Typically stored in linked lists. * * Note that `type' serves a double meaning: TOK_SMAC_PARAM is not * necessarily used as-is, but is intended to denote the number of * the substituted parameter. So in the definition * *     %define a(x,y) ( (x) & ~(y) ) *  * the token representing `x' will have its type changed to * TOK_SMAC_PARAM, but the one representing `y' will be * TOK_SMAC_PARAM+1. * * TOK_INTERNAL_STRING is a dirty hack: it's a single string token * which doesn't need quotes around it. Used in the pre-include * mechanism as an alternative to trying to find a sensible type of * quote to use on the filename we were passed. */struct Token{    Token *next;    char *text;    SMacro *mac;		/* associated macro for TOK_SMAC_END */    int type;};enum{    TOK_WHITESPACE = 1, TOK_COMMENT, TOK_ID, TOK_PREPROC_ID, TOK_STRING,    TOK_NUMBER, TOK_SMAC_END, TOK_OTHER, TOK_SMAC_PARAM,    TOK_INTERNAL_STRING};/* * Multi-line macro definitions are stored as a linked list of * these, which is essentially a container to allow several linked * lists of Tokens. *  * Note that in this module, linked lists are treated as stacks * wherever possible. For this reason, Lines are _pushed_ on to the * `expansion' field in MMacro structures, so that the linked list, * if walked, would give the macro lines in reverse order; this * means that we can walk the list when expanding a macro, and thus * push the lines on to the `expansion' field in _istk_ in reverse * order (so that when popped back off they are in the right * order). It may seem cockeyed, and it relies on my design having * an even number of steps in, but it works... * * Some of these structures, rather than being actual lines, are * markers delimiting the end of the expansion of a given macro. * This is for use in the cycle-tracking and %rep-handling code. * Such structures have `finishes' non-NULL, and `first' NULL. All * others have `finishes' NULL, but `first' may still be NULL if * the line is blank. */struct Line{    Line *next;    MMacro *finishes;    Token *first;};/* * To handle an arbitrary level of file inclusion, we maintain a * stack (ie linked list) of these things. */struct Include{    Include *next;    FILE *fp;    Cond *conds;    Line *expansion;    char *fname;    int lineno, lineinc;    MMacro *mstk;		/* stack of active macros/reps */};/* * Include search path. This is simply a list of strings which get * prepended, in turn, to the name of an include file, in an * attempt to find the file if it's not in the current directory. */struct IncPath{    IncPath *next;    char *path;};/* * Conditional assembly: we maintain a separate stack of these for * each level of file inclusion. (The only reason we keep the * stacks separate is to ensure that a stray `%endif' in a file * included from within the true branch of a `%if' won't terminate * it and cause confusion: instead, rightly, it'll cause an error.) */struct Cond{    Cond *next;    int state;};enum{    /*     * These states are for use just after %if or %elif: IF_TRUE     * means the condition has evaluated to truth so we are     * currently emitting, whereas IF_FALSE means we are not     * currently emitting but will start doing so if a %else comes     * up. In these states, all directives are admissible: %elif,     * %else and %endif. (And of course %if.)     */    COND_IF_TRUE, COND_IF_FALSE,    /*     * These states come up after a %else: ELSE_TRUE means we're     * emitting, and ELSE_FALSE means we're not. In ELSE_* states,     * any %elif or %else will cause an error.     */    COND_ELSE_TRUE, COND_ELSE_FALSE,    /*     * This state means that we're not emitting now, and also that     * nothing until %endif will be emitted at all. It's for use in     * two circumstances: (i) when we've had our moment of emission     * and have now started seeing %elifs, and (ii) when the     * condition construct in question is contained within a     * non-emitting branch of a larger condition construct.     */    COND_NEVER};#define emitting(x) ( (x) == COND_IF_TRUE || (x) == COND_ELSE_TRUE )/*  * These defines are used as the possible return values for do_directive */#define NO_DIRECTIVE_FOUND  0#define DIRECTIVE_FOUND	    1/* * Condition codes. Note that we use c_ prefix not C_ because C_ is * used in nasm.h for the "real" condition codes. At _this_ level, * we treat CXZ and ECXZ as condition codes, albeit non-invertible * ones, so we need a different enum... */static const char *conditions[] = {    "a", "ae", "b", "be", "c", "cxz", "e", "ecxz", "g", "ge", "l", "le",    "na", "nae", "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no",    "np", "ns", "nz", "o", "p", "pe", "po", "s", "z"};enum{    c_A, c_AE, c_B, c_BE, c_C, c_CXZ, c_E, c_ECXZ, c_G, c_GE, c_L, c_LE,    c_NA, c_NAE, c_NB, c_NBE, c_NC, c_NE, c_NG, c_NGE, c_NL, c_NLE, c_NO,    c_NP, c_NS, c_NZ, c_O, c_P, c_PE, c_PO, c_S, c_Z};static int inverse_ccs[] = {    c_NA, c_NAE, c_NB, c_NBE, c_NC, -1, c_NE, -1, c_NG, c_NGE, c_NL, c_NLE,    c_A, c_AE, c_B, c_BE, c_C, c_E, c_G, c_GE, c_L, c_LE, c_O, c_P, c_S,    c_Z, c_NO, c_NP, c_PO, c_PE, c_NS, c_NZ};/* * Directive names. */static const char *directives[] = {    "%arg",    "%assign", "%clear", "%define", "%elif", "%elifctx", "%elifdef",    "%elifid", "%elifidn", "%elifidni", "%elifmacro", "%elifnctx", "%elifndef",    "%elifnid", "%elifnidn", "%elifnidni", "%elifnmacro", "%elifnnum", "%elifnstr",    "%elifnum", "%elifstr", "%else", "%endif", "%endm", "%endmacro",    "%endrep", "%error", "%exitrep", "%iassign", "%idefine", "%if",    "%ifctx", "%ifdef", "%ifid", "%ifidn", "%ifidni", "%ifmacro", "%ifnctx",    "%ifndef", "%ifnid", "%ifnidn", "%ifnidni", "%ifnmacro", "%ifnnum",    "%ifnstr", "%ifnum", "%ifstr", "%imacro", "%include",    "%ixdefine", "%line",    "%local",    "%macro", "%pop", "%push", "%rep", "%repl", "%rotate",    "%stacksize",    "%strlen", "%substr", "%undef", "%xdefine"};enum{    PP_ARG,    PP_ASSIGN, PP_CLEAR, PP_DEFINE, PP_ELIF, PP_ELIFCTX, PP_ELIFDEF,    PP_ELIFID, PP_ELIFIDN, PP_ELIFIDNI, PP_ELIFMACRO, PP_ELIFNCTX, PP_ELIFNDEF,    PP_ELIFNID, PP_ELIFNIDN, PP_ELIFNIDNI, PP_ELIFNMACRO, PP_ELIFNNUM, PP_ELIFNSTR,    PP_ELIFNUM, PP_ELIFSTR, PP_ELSE, PP_ENDIF, PP_ENDM, PP_ENDMACRO,    PP_ENDREP, PP_ERROR, PP_EXITREP, PP_IASSIGN, PP_IDEFINE, PP_IF,    PP_IFCTX, PP_IFDEF, PP_IFID, PP_IFIDN, PP_IFIDNI, PP_IFMACRO, PP_IFNCTX,    PP_IFNDEF, PP_IFNID, PP_IFNIDN, PP_IFNIDNI, PP_IFNMACRO, PP_IFNNUM,    PP_IFNSTR, PP_IFNUM, PP_IFSTR, PP_IMACRO, PP_INCLUDE,    PP_IXDEFINE, PP_LINE,    PP_LOCAL,    PP_MACRO, PP_POP, PP_PUSH, PP_REP, PP_REPL, PP_ROTATE,    PP_STACKSIZE,    PP_STRLEN, PP_SUBSTR, PP_UNDEF, PP_XDEFINE};/* If this is a an IF, ELIF, ELSE or ENDIF keyword */static int is_condition(int arg){    return ((arg >= PP_ELIF) && (arg <= PP_ENDIF)) ||	((arg >= PP_IF) && (arg <= PP_IFSTR));}/* For TASM compatibility we need to be able to recognise TASM compatible * conditional compilation directives. Using the NASM pre-processor does * not work, so we look for them specifically from the following list and * then jam in the equivalent NASM directive into the input stream. */#ifndef MAX#       define MAX(a,b) ( ((a) > (b)) ? (a) : (b))#endifenum{    TM_ARG, TM_ELIF, TM_ELSE, TM_ENDIF, TM_IF, TM_IFDEF, TM_IFDIFI,    TM_IFNDEF, TM_INCLUDE, TM_LOCAL};static const char *tasm_directives[] = {    "arg", "elif", "else", "endif", "if", "ifdef", "ifdifi",    "ifndef", "include", "local"};static int StackSize = 4;static char *StackPointer = "ebp";static int ArgOffset = 8;static int LocalOffset = 4;static Context *cstk;static Include *istk;static IncPath *ipath = NULL;static efunc _error;		/* Pointer to client-provided error reporting function */static evalfunc evaluate;static int pass;		/* HACK: pass 0 = generate dependencies only */static unsigned long unique;	/* unique identifier numbers */static Line *predef = NULL;static ListGen *list;/* * The number of hash values we use for the macro lookup tables. * FIXME: We should *really* be able to configure this at run time, * or even have the hash table automatically expanding when necessary. */#define NHASH 31/* * The current set of multi-line macros we have defined. */static MMacro *mmacros[NHASH];/* * The current set of single-line macros we have defined. */static SMacro *smacros[NHASH];/* * The multi-line macro we are currently defining, or the %rep * block we are currently reading, if any. */static MMacro *defining;/* * The number of macro parameters to allocate space for at a time. */#define PARAM_DELTA 16/* * The standard macro set: defined as `static char *stdmac[]'. Also * gives our position in the macro set, when we're processing it. */#include "macros.c"static const char **stdmacpos;/* * The extra standard macros that come from the object format, if * any. */static const char **extrastdmac = NULL;int any_extrastdmac;/* * Tokens are allocated in blocks to improve speed */#define TOKEN_BLOCKSIZE 4096static Token *freeTokens = NULL;struct Blocks {	Blocks *next;	void *chunk;};static Blocks blocks = { NULL, NULL };/* * Forward declarations. */static Token *expand_mmac_params(Token * tline);static Token *expand_smacro(Token * tline);static Token *expand_id(Token * tline);static Context *get_ctx(char *name, int all_contexts);static void make_tok_num(Token * tok, long val);static void error(int severity, const char *fmt, ...);static void *new_Block(size_t size);static void delete_Blocks(void);static Token *new_Token(Token * next, int type, char *text, int txtlen);static Token *delete_Token(Token * t);/* * Macros for safe checking of token pointers, avoid *(NULL) */#define tok_type_(x,t) ((x) && (x)->type == (t))#define skip_white_(x) if (tok_type_((x), TOK_WHITESPACE)) (x)=(x)->next#define tok_is_(x,v) (tok_type_((x), TOK_OTHER) && !strcmp((x)->text,(v)))#define tok_isnt_(x,v) ((x) && ((x)->type!=TOK_OTHER || strcmp((x)->text,(v))))/* Handle TASM specific directives, which do not contain a % in * front of them. We do it here because I could not find any other * place to do it for the moment, and it is a hack (ideally it would * be nice to be able to use the NASM pre-processor to do it). */static char *check_tasm_directive(char *line){    int i, j, k, m, len;    char *p = line, *oldline, oldchar;    /* Skip whitespace */    while (isspace(*p) && *p != 0)	p++;    /* Binary search for the directive name */    i = -1;    j = elements(tasm_directives);    len = 0;    while (!isspace(p[len]) && p[len] != 0)	len++;    if (len)    {	oldchar = p[len];	p[len] = 0;	while (j - i > 1)	{	    k = (j + i) / 2;	    m = nasm_stricmp(p, tasm_directives[k]);	    if (m == 0)	    {		/* We have found a directive, so jam a % in front of it		 * so that NASM will then recognise it as one if it's own.		 */		p[len] = oldchar;		len = strlen(p);		oldline = line;		line = nasm_malloc(len + 2);		line[0] = '%';		if (k == TM_IFDIFI)		{		    /* NASM does not recognise IFDIFI, so we convert it to		     * %ifdef BOGUS. This is not used in NASM comaptible		     * code, but does need to parse for the TASM macro		     * package.		     */		    strcpy(line + 1, "ifdef BOGUS");		}		else		{		    memcpy(line + 1, p, len + 1);		}		nasm_free(oldline);		return line;	    }	    else if (m < 0)	    {		j = k;	    }	    else		i = k;	}	p[len] = oldchar;    }    return line;}/* * The pre-preprocessing stage... This function translates line * number indications as they emerge from GNU cpp (`# lineno "file" * flags') into NASM preprocessor line number indications (`%line * lineno file'). */static char *prepreproc(char *line){    int lineno, fnlen;    char *fname, *oldline;    if (line[0] == '#' && line[1] == ' ')    {	oldline = line;	fname = oldline + 2;	lineno = atoi(fname);	fname += strspn(fname, "0123456789 ");	if (*fname == '"')
preproc.c - 源码说明

本页面展示了「开源的nasm编译器源码,研究编译器原理很有帮且」中的 preproc.c 源码文件，采用 C语言编程语言编写，共 2,635 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与nasm相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?