preproc.c

来自「一个汇编语言编译器源码」· C语言 代码 · 共 2,416 行 · 第 1/5 页

C
2,416
字号

    if (stdmacpos) {
	if (*stdmacpos) {
	    char *ret = nasm_strdup(*stdmacpos++);
	    if (!*stdmacpos && any_extrastdmac) 
	    {
		stdmacpos = extrastdmac;
		any_extrastdmac = FALSE;
		return ret;
	    }
	    /*
	     * Nasty hack: here we push the contents of `predef' on
	     * to the top-level expansion stack, since this is the
	     * most convenient way to implement the pre-include and
	     * pre-define features.
	     */
	    if (!*stdmacpos) 
	    {
		Line *pd, *l;
		Token *head, **tail, *t, *tt;

		for (pd = predef; pd; pd = pd->next) {
		    head = NULL;
		    tail = &head;
		    for (t = pd->first; t; t = t->next) {
			tt = *tail = nasm_malloc(sizeof(Token));
			tt->next = NULL;
			tail = &tt->next;
			tt->type = t->type;
			tt->text = nasm_strdup(t->text);
			tt->mac = t->mac;   /* always NULL here, in fact */
		    }
		    l = nasm_malloc(sizeof(Line));
		    l->next = istk->expansion;
		    l->first = head;
		    l->finishes = FALSE;
		    istk->expansion = l;
		}
	    }
	    return ret;
	} 
	else {
	    stdmacpos = NULL;
	}
    }

    bufsize = BUF_DELTA;
    buffer = nasm_malloc(BUF_DELTA);
    p = buffer;
    while (1) {
	q = fgets(p, bufsize-(p-buffer), istk->fp);
	if (!q)
	    break;
	p += strlen(p);
	if (p > buffer && p[-1] == '\n') {
	    break;
	}
	if (p-buffer > bufsize-10) {
	    long offset = p-buffer;
	    bufsize += BUF_DELTA;
	    buffer = nasm_realloc(buffer, bufsize);
	    p = buffer+offset;	       /* prevent stale-pointer problems */
	}
    }

    if (!q && p == buffer) {
	nasm_free (buffer);
	return NULL;
    }

    src_set_linnum(src_get_linnum() + istk->lineinc);

    /*
     * Play safe: remove CRs as well as LFs, if any of either are
     * present at the end of the line.
     */
    while (--p >= buffer && (*p == '\n' || *p == '\r'))
	*p = '\0';

    /*
     * Handle spurious ^Z, which may be inserted into source files
     * by some file transfer utilities.
     */
    buffer[strcspn(buffer, "\032")] = '\0';

    list->line (LIST_READ, buffer);

    return buffer;
}

/*
 * Tokenise a line of text. This is a very simple process since we
 * don't need to parse the value out of e.g. numeric tokens: we
 * simply split one string into many.
 */
static Token *tokenise (char *line) 
{
    char *p = line;
    int type;
    Token *list = NULL;
    Token *t, **tail = &list;

    while (*line) {
	p = line;
	if (*p == '%' && ( isdigit(p[1]) || 
	      ((p[1] == '-' || p[1] == '+') && isdigit(p[2]))))
	{
	    p++;
	    do {
		p++;
	    } while (isdigit(*p));
	    type = TOK_PREPROC_ID;
	}
	else if (*p == '%' && p[1] == '{') {
	    p += 2;
	    while (*p && *p != '}') {
		p[-1] = *p;
		p++;
	    }
	    p[-1] = '\0';
	    if (*p) p++;
	    type = TOK_PREPROC_ID;
	}
	else if (*p == '%' && (isidchar(p[1]) ||
		  ((p[1] == '!' || p[1] == '%' || p[1] == '$') &&
		  isidchar(p[2]))))
	{
	    p++;
	    do {
		p++;
	    } while (isidchar(*p));
	    type = TOK_PREPROC_ID;
	}
	else if (isidstart(*p) || (*p == '$' && isidstart(p[1]))) {
	    type = TOK_ID;
	    p++;
	    while (*p && isidchar(*p))
		p++;
	}
	else if (*p == '\'' || *p == '"') {
	    /*
	     * A string token.
	     */
	    char c = *p;
	    p++;
	    type = TOK_STRING;
	    while (*p && *p != c)
		p++;
	    if (*p) p++;
	} 
	else if (isnumstart(*p)) {
	    /*
	     * A number token.
	     */
	    type = TOK_NUMBER;
	    p++;
	    while (*p && isnumchar(*p))
		p++;
	} 
	else if (isspace(*p)) {
	    type = TOK_WHITESPACE;
	    p++;
	    while (*p && isspace(*p))
		p++;
	    /*
	     * Whitespace just before end-of-line is discarded by
	     * pretending it's a comment; whitespace just before a
	     * comment gets lumped into the comment.
	     */
	    if (!*p || *p == ';') {
		type = TOK_COMMENT;
		while (*p) p++;
	    }
	} 
	else if (*p == ';') {
	    type = TOK_COMMENT;
	    while (*p) p++;
	} 
	else {
	    /*
	     * Anything else is an operator of some kind. We check
	     * for all the double-character operators (>>, <<, //,
	     * %%, <=, >=, ==, !=, <>, &&, ||, ^^), but anything
	     * else is a single-character operator.
	     */
	    type = TOK_OTHER;
	    if ((p[0] == '>' && p[1] == '>') ||
		(p[0] == '<' && p[1] == '<') ||
		(p[0] == '/' && p[1] == '/') ||
		(p[0] == '%' && p[1] == '%') ||
		(p[0] == '<' && p[1] == '=') ||
		(p[0] == '>' && p[1] == '=') ||
		(p[0] == '=' && p[1] == '=') ||
		(p[0] == '!' && p[1] == '=') ||
		(p[0] == '<' && p[1] == '>') ||
		(p[0] == '&' && p[1] == '&') ||
		(p[0] == '|' && p[1] == '|') ||
		(p[0] == '^' && p[1] == '^'))
	    {
		p++;
	    }
	    p++;
	}
	if (type != TOK_COMMENT) {
	    *tail = t = nasm_malloc (sizeof(Token));
	    tail = &t->next;
	    t->next = NULL;
	    t->type = type;
	    t->text = nasm_malloc(1+p-line);
	    strncpy(t->text, line, p-line);
	    t->text[p-line] = '\0';
	}
	line = p;
    }

    return list;
}

/*
 * Convert a line of tokens back into text.
 */
char *detoken (Token *tlist) 
{
    Token *t;
    int len;
    char *line, *p;

    len = 0;
    for (t = tlist; t; t = t->next) {
	if (t->type == TOK_PREPROC_ID && t->text[1] == '!') {
	    char *p = getenv(t->text+2);
	    nasm_free (t->text);
	    if (p)
		t->text = nasm_strdup(p);
	    else
		t->text = NULL;
	}
	if (t->text)
	    len += strlen(t->text);
    }
    p = line = nasm_malloc(len+1);
    for (t = tlist; t; t = t->next) {
	if (t->text) {
	    strcpy (p, t->text);
	    p += strlen(p);
	}
    }
    *p = '\0';
    return line;
}

/*
 * A scanner, suitable for use by the expression evaluator, which
 * operates on a line of Tokens. Expects a pointer to a pointer to
 * the first token in the line to be passed in as its private_data
 * field.
 */
static int ppscan(void *private_data, struct tokenval *tokval) 
{
    Token **tlineptr = private_data;
    Token *tline;

    do {
	tline = *tlineptr;
	*tlineptr = tline ? tline->next : NULL;
    } while (tline && (tline->type == TOK_WHITESPACE ||
		       tline->type == TOK_COMMENT));

    if (!tline)
	return tokval->t_type = TOKEN_EOS;

    if (tline->text[0] == '$' && !tline->text[1])
	return tokval->t_type = TOKEN_HERE;
    if (tline->text[0] == '$' && tline->text[1] == '$' && !tline->text[1])
	return tokval->t_type = TOKEN_BASE;

    if (tline->type == TOK_ID) {
	tokval->t_charptr = tline->text;
	if (tline->text[0] == '$') {
	    tokval->t_charptr++;
	    return tokval->t_type = TOKEN_ID;
	}

	/*
	 * This is the only special case we actually need to worry
	 * about in this restricted context.
	 */
	if (!nasm_stricmp(tline->text, "seg"))
	    return tokval->t_type = TOKEN_SEG;

	return tokval->t_type = TOKEN_ID;
    }

    if (tline->type == TOK_NUMBER) {
	int rn_error;

	tokval->t_integer = readnum(tline->text, &rn_error);
	if (rn_error)
	    return tokval->t_type = TOKEN_ERRNUM;
	tokval->t_charptr = NULL;
	return tokval->t_type = TOKEN_NUM;
    }

    if (tline->type == TOK_STRING) {
	int rn_warn;
	char q, *r;
	int l;

	r = tline->text;
	q = *r++;
	l = strlen(r);

	if (l == 0 || r[l-1] != q)
	    return tokval->t_type = TOKEN_ERRNUM;
	tokval->t_integer = readstrnum(r, l-1, &rn_warn);
	if (rn_warn)
	    error(ERR_WARNING|ERR_PASS1,
		  "character constant too long");
	tokval->t_charptr = NULL;
	return tokval->t_type = TOKEN_NUM;
    }

    if (tline->type == TOK_OTHER) {
	if (!strcmp(tline->text, "<<")) return tokval->t_type = TOKEN_SHL;
	if (!strcmp(tline->text, ">>")) return tokval->t_type = TOKEN_SHR;
	if (!strcmp(tline->text, "//")) return tokval->t_type = TOKEN_SDIV;
	if (!strcmp(tline->text, "%%")) return tokval->t_type = TOKEN_SMOD;
	if (!strcmp(tline->text, "==")) return tokval->t_type = TOKEN_EQ;
	if (!strcmp(tline->text, "<>")) return tokval->t_type = TOKEN_NE;
	if (!strcmp(tline->text, "!=")) return tokval->t_type = TOKEN_NE;
	if (!strcmp(tline->text, "<=")) return tokval->t_type = TOKEN_LE;
	if (!strcmp(tline->text, ">=")) return tokval->t_type = TOKEN_GE;
	if (!strcmp(tline->text, "&&")) return tokval->t_type = TOKEN_DBL_AND;
	if (!strcmp(tline->text, "^^")) return tokval->t_type = TOKEN_DBL_XOR;
	if (!strcmp(tline->text, "||")) return tokval->t_type = TOKEN_DBL_OR;
    }

    /*
     * We have no other options: just return the first character of
     * the token text.
     */
    return tokval->t_type = tline->text[0];
}

/*
 * Return the Context structure associated with a %$ token. Return
 * NULL, having _already_ reported an error condition, if the
 * context stack isn't deep enough for the supplied number of $
 * signs.
 */
static Context *get_ctx (char *name) 
{
    Context *ctx;
    int i;

    if (!cstk) {
	error (ERR_NONFATAL, "`%s': context stack is empty", name);
	return NULL;
    }

    i = 1;
    ctx = cstk;
    while (name[i+1] == '$') {
	i++;
	ctx = ctx->next;
	if (!ctx) {
	    error (ERR_NONFATAL, "`%s': context stack is only"
		   " %d level%s deep", name, i-1, (i==2 ? "" : "s"));
	    return NULL;
	}
    }
    return ctx;
}

/*
 * Compare a string to the name of an existing macro; this is a
 * simple wrapper which calls either strcmp or nasm_stricmp
 * depending on the value of the `casesense' parameter.
 */
static int mstrcmp(char *p, char *q, int casesense) 
{
    return casesense ? strcmp(p,q) : nasm_stricmp(p,q);
}

/*
 * Open an include file. This routine must always return a valid
 * file pointer if it returns - it's responsible for throwing an
 * ERR_FATAL and bombing out completely if not. It should also try
 * the include path one by one until it finds the file or reaches
 * the end of the path.
 */
static FILE *inc_fopen(char *file) 
{
    FILE *fp;
    char *prefix = "", *combine;
    IncPath *ip = ipath;
    static int namelen = 0;

    while (1) {
	combine = nasm_strcat(prefix,file);
	fp = fopen(combine, "r");
	if (pass == 0 && fp)
	{
	  namelen += strlen(combine) + 1;
	  if (namelen > 62)
	  {
	    printf(" \\\n  ");
	    namelen = 2;
	  }
	  printf(" %s", combine);
	}
	nasm_free (combine);
	if (fp)
	    return fp;
	if (!ip)
	    break;
	prefix = ip->path;
	ip = ip->next;
    }

    error (ERR_FATAL,
	   "unable to open include file `%s'", file);
    return NULL;		       /* never reached - placate compilers */
}

/*
 * Determine if we should warn on defining a single-line macro of
 * name `name', with `nparam' parameters. If nparam is 0 or -1, will
 * return TRUE if _any_ single-line macro of that name is defined.
 * Otherwise, will return TRUE if a single-line macro with either
 * `nparam' or no parameters is defined.
 *
 * If a macro with precisely the right number of parameters is
 * defined, or nparam is -1, the address of the definition structure
 * will be returned in `defn'; otherwise NULL will be returned. If `defn'
 * is NULL, no action will be taken regarding its contents, and no
 * error will occur.
 *
 * Note that this is also called with nparam zero to resolve
 * `ifdef'.
 */
static int smacro_defined (char *name, int nparam, SMacro **defn, int nocase) 
{
    SMacro *m;
    Context *ctx;
    char *p;

    if (name[0] == '%' && name[1] == '$') {
	ctx = get_ctx (name);
	if (!ctx)
	    return FALSE;	       /* got to return _something_ */
	m = ctx->localmac;
	p = name+1;
	p += strspn(p, "$");
    } else {
	m = smacros[hash(name)];
	p = name;
    }

    while (m) {
	if (!mstrcmp(m->name, p, m->casesense & nocase) &&
	    (nparam <= 0 || m->nparam == 0 || nparam == m->nparam)) {
	    if (defn) {
		if (nparam == m->nparam || nparam == -1)
		    *defn = m;
		else
		    *defn = NULL;
	    }
	    return TRUE;
	}
	m = m->next;
    }
    return FALSE;
}

/*
 * Count and mark off the parameters in a multi-line macro call.
 * This is called both from within the multi-line macro expansion
 * code, and also to mark off the default parameters when provided
 * in a %macro definition line.
 */
static void count_mmac_params (Token *t, int *nparam, Token ***params) 
{
    int paramsize, brace;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?