📄 preproc.c

📁 CC386 is a general-purpose 32-bit C compiler. It is not an optimizing compiler but given that the co
💻 C
📖 第 1 页 / 共 5 页
字号:
 * number indications as they emerge from GNU cpp (`# lineno "file"
 * flags') into NASM preprocessor line number indications (`%line
 * lineno file').
 */
static char *
prepreproc(char *line)
{
    int lineno, fnlen;
    char *fname, *oldline;

    if (line[0] == '#' && line[1] == ' ')
    {
	oldline = line;
	fname = oldline + 2;
	lineno = atoi(fname);
	fname += strspn(fname, "0123456789 ");
	if (*fname == '"')
	    fname++;
	fnlen = strcspn(fname, "\"");
	line = nasm_malloc(20 + fnlen);
	sprintf(line, "%%line %d %.*s", lineno, fnlen, fname);
	nasm_free(oldline);
    }
    if (tasm_compatible_mode)
	return check_tasm_directive(line);
    return line;
}

/*
 * The hash function for macro lookups. Note that due to some
 * macros having case-insensitive names, the hash function must be
 * invariant under case changes. We implement this by applying a
 * perfectly normal hash function to the uppercase of the string.
 */
static int
hash(char *s)
{
    unsigned int h = 0;
    int i = 0;
    /*
     * Powers of three, mod 31.
     */
    static const int multipliers[] = {
	1, 3, 9, 27, 19, 26, 16, 17, 20, 29, 25, 13, 8, 24, 10,
	30, 28, 22, 4, 12, 5, 15, 14, 11, 2, 6, 18, 23, 7, 21
    };


    while (*s)
    {
	h += multipliers[i] * (unsigned char) (toupper(*s));
	s++;
	if (++i >= elements(multipliers))
	    i = 0;
    }
    h %= NHASH;
    return h;
}

/*
 * Free a linked list of tokens.
 */
static void
free_tlist(Token * list)
{
    while (list)
    {
	list = delete_Token(list);
    }
}

/*
 * Free a linked list of lines.
 */
static void
free_llist(Line * list)
{
    Line *l;
    while (list)
    {
	l = list;
	list = list->next;
	free_tlist(l->first);
	nasm_free(l);
    }
}

/*
 * Free an MMacro
 */
static void
free_mmacro(MMacro * m)
{
    nasm_free(m->name);
    free_tlist(m->dlist);
    nasm_free(m->defaults);
    free_llist(m->expansion);
    nasm_free(m);
}

/*
 * Pop the context stack.
 */
static void
ctx_pop(void)
{
    Context *c = cstk;
    SMacro *smac, *s;

    cstk = cstk->next;
    smac = c->localmac;
    while (smac)
    {
	s = smac;
	smac = smac->next;
	nasm_free(s->name);
	free_tlist(s->expansion);
	nasm_free(s);
    }
    nasm_free(c->name);
    nasm_free(c);
}

#define BUF_DELTA 512
/*
 * Read a line from the top file in istk, handling multiple CR/LFs
 * at the end of the line read, and handling spurious ^Zs. Will
 * return lines from the standard macro set if this has not already
 * been done.
 */
static char *
read_line(void)
{
    char *buffer, *p, *q;
    int bufsize, continued_count;

    if (stdmacpos)
    {
	if (*stdmacpos)
	{
	    char *ret = nasm_strdup(*stdmacpos++);
	    if (!*stdmacpos && any_extrastdmac)
	    {
		stdmacpos = extrastdmac;
		any_extrastdmac = FALSE;
		return ret;
	    }
	    /*
	     * Nasty hack: here we push the contents of `predef' on
	     * to the top-level expansion stack, since this is the
	     * most convenient way to implement the pre-include and
	     * pre-define features.
	     */
	    if (!*stdmacpos)
	    {
		Line *pd, *l;
		Token *head, **tail, *t;

		for (pd = predef; pd; pd = pd->next)
		{
		    head = NULL;
		    tail = &head;
		    for (t = pd->first; t; t = t->next)
		    {
			*tail = new_Token(NULL, t->type, t->text, 0);
			tail = &(*tail)->next;
		    }
		    l = nasm_malloc(sizeof(Line));
		    l->next = istk->expansion;
		    l->first = head;
		    l->finishes = FALSE;
		    istk->expansion = l;
		}
	    }
	    return ret;
	}
	else
	{
	    stdmacpos = NULL;
	}
    }

    bufsize = BUF_DELTA;
    buffer = nasm_malloc(BUF_DELTA);
    p = buffer;
    continued_count = 0;
    while (1)
    {
	q = fgets(p, bufsize - (p - buffer), istk->fp);
	if (!q)
	    break;
	p += strlen(p);
	if (p > buffer && p[-1] == '\n')
	{
           /* Convert backslash-CRLF line continuation sequences into
              nothing at all (for DOS and Windows) */
           if (((p - 2) > buffer) && (p[-3] == '\\') && (p[-2] == '\r')) {
               p -= 3;
               *p = 0;
               continued_count++;
           }
           /* Also convert backslash-LF line continuation sequences into
              nothing at all (for Unix) */
           else if (((p - 1) > buffer) && (p[-2] == '\\')) {
               p -= 2;
               *p = 0;
               continued_count++;
           }
           else {
               break;
           }
	}
	if (p - buffer > bufsize - 10)
	{
	    long offset = p - buffer;
	    bufsize += BUF_DELTA;
	    buffer = nasm_realloc(buffer, bufsize);
	    p = buffer + offset;	/* prevent stale-pointer problems */
	}
    }

    if (!q && p == buffer)
    {
	nasm_free(buffer);
	return NULL;
    }

    src_set_linnum(src_get_linnum() + istk->lineinc + (continued_count * istk->lineinc));

    /*
     * Play safe: remove CRs as well as LFs, if any of either are
     * present at the end of the line.
     */
    while (--p >= buffer && (*p == '\n' || *p == '\r'))
	*p = '\0';

    /*
     * Handle spurious ^Z, which may be inserted into source files
     * by some file transfer utilities.
     */
    buffer[strcspn(buffer, "\032")] = '\0';

    list->line(LIST_READ, buffer);

    return buffer;
}

/*
 * Tokenise a line of text. This is a very simple process since we
 * don't need to parse the value out of e.g. numeric tokens: we
 * simply split one string into many.
 */
static Token *
tokenise(char *line)
{
    char *p = line;
    int type;
    Token *list = NULL;
    Token *t, **tail = &list;

    while (*line)
    {
	p = line;
	if (*p == '%')
	{
		p++;
		if ( isdigit(*p) ||
			((*p == '-' || *p == '+') && isdigit(p[1])) ||
			((*p == '+') && (isspace(p[1]) || !p[1])))
				{
			do
			{
			p++;
			}
			while (isdigit(*p));
			type = TOK_PREPROC_ID;
		}
		else if (*p == '{')
		{
			p++;
			while (*p && *p != '}')
			{
			p[-1] = *p;
			p++;
			}
			p[-1] = '\0';
			if (*p)
			p++;
			type = TOK_PREPROC_ID;
		}
		else if (isidchar(*p) ||
				((*p == '!' || *p == '%' || *p == '$') &&
					isidchar(p[1])))
		{
			do
			{
			p++;
			}
			while (isidchar(*p));
			type = TOK_PREPROC_ID;
		}
		else
		{
			type = TOK_OTHER;
			if (*p == '%')
				p++;
		}
	}
	else if (isidstart(*p) || (*p == '$' && isidstart(p[1])))
	{
	    type = TOK_ID;
	    p++;
	    while (*p && isidchar(*p))
		p++;
	}
	else if (*p == '\'' || *p == '"')
	{
	    /*
	     * A string token.
	     */
	    char c = *p;
	    p++;
	    type = TOK_STRING;
	    while (*p && *p != c)
		p++;
	    if (*p)
	    {
		p++;
	    }
	    else
	    {
		error(ERR_WARNING, "unterminated string");
	    }
	}
	else if (isnumstart(*p))
	{
	    /*
	     * A number token.
	     */
	    type = TOK_NUMBER;
	    p++;
	    while (*p && isnumchar(*p))
		p++;
	}
	else if (isspace(*p))
	{
	    type = TOK_WHITESPACE;
	    p++;
	    while (*p && isspace(*p))
		p++;
	    /*
	     * Whitespace just before end-of-line is discarded by
	     * pretending it's a comment; whitespace just before a
	     * comment gets lumped into the comment.
	     */
	    if (!*p || *p == ';')
	    {
		type = TOK_COMMENT;
		while (*p)
		    p++;
	    }
	}
	else if (*p == ';')
	{
	    type = TOK_COMMENT;
	    while (*p)
		p++;
	}
	else
	{
	    /*
	     * Anything else is an operator of some kind. We check
	     * for all the double-character operators (>>, <<, //,
	     * %%, <=, >=, ==, !=, <>, &&, ||, ^^), but anything
	     * else is a single-character operator.
	     */
	    type = TOK_OTHER;
	    if ((p[0] == '>' && p[1] == '>') ||
		    (p[0] == '<' && p[1] == '<') ||
		    (p[0] == '/' && p[1] == '/') ||
		    (p[0] == '<' && p[1] == '=') ||
		    (p[0] == '>' && p[1] == '=') ||
		    (p[0] == '=' && p[1] == '=') ||
		    (p[0] == '!' && p[1] == '=') ||
		    (p[0] == '<' && p[1] == '>') ||
		    (p[0] == '&' && p[1] == '&') ||
		    (p[0] == '|' && p[1] == '|') ||
		    (p[0] == '^' && p[1] == '^'))
	    {
		p++;
	    }
	    p++;
	}
	if (type != TOK_COMMENT)
	{
	    *tail = t = new_Token(NULL, type, line, p - line);
	    tail = &t->next;
	}
	line = p;
    }
    return list;
}

/*
 * this function allocates a new managed block of memory and
 * returns a pointer to the block.  The managed blocks are 
 * deleted only all at once by the delete_Blocks function.
 */
static void *
new_Block(size_t size)
{
	Blocks *b = &blocks;
	
	/* first, get to the end of the linked list	 */
	while (b->next)
		b = b->next;
	/* now allocate the requested chunk */
	b->chunk = nasm_malloc(size);
	
	/* now allocate a new block for the next request */
	b->next = nasm_malloc(sizeof(Blocks));
	/* and initialize the contents of the new block */
	b->next->next = NULL;
	b->next->chunk = NULL;
	return b->chunk;
}

/*
 * this function deletes all managed blocks of memory
 */
static void
delete_Blocks(void)
{
	Blocks *a,*b = &blocks;

	/* 
	 * keep in mind that the first block, pointed to by blocks
	 * is a static and not dynamically allocated, so we don't 
	 * free it.
	 */
	while (b)
	{
		if (b->chunk)
			nasm_free(b->chunk);
		a = b;
		b = b->next;
                if (a != &blocks)
			nasm_free(a);
	}
}	

/*
 *  this function creates a new Token and passes a pointer to it 
 *  back to the caller.  It sets the type and text elements, and
 *  also the mac and next elements to NULL.
 */
static Token *
new_Token(Token * next, int type, char *text, int txtlen)
{
    Token *t;
    int i;

    if (freeTokens == NULL)
    {
	freeTokens = (Token *)new_Block(TOKEN_BLOCKSIZE * sizeof(Token));
	for (i = 0; i < TOKEN_BLOCKSIZE - 1; i++)
	    freeTokens[i].next = &freeTokens[i + 1];
	freeTokens[i].next = NULL;
    }
    t = freeTokens;
    freeTokens = t->next;
    t->next = next;
    t->mac = NULL;
    t->type = type;
    if (type == TOK_WHITESPACE || text == NULL)
    {
	t->text = NULL;
    }
    else
    {
	if (txtlen == 0)
	    txtlen = strlen(text);
	t->text = nasm_malloc(1 + txtlen);
	strncpy(t->text, text, txtlen);
	t->text[txtlen] = '\0';
    }
    return t;
}

static Token *
delete_Token(Token * t)
{
    Token *next = t->next;
    nasm_free(t->text);
    t->next = freeTokens;
    freeTokens = t;
    return next;
}

/*
 * Convert a line of tokens back into text.
 * If expand_locals is not zero, identifiers of the form "%$*xxx"
 * will be transformed into ..@ctxnum.xxx
 */
static char *
detoken(Token * tlist, int expand_locals)
{
    Token *t;
    int len;
    char *line, *p;
💿 文件大小 2267 K
👤 上传用户 jhjfjh22544
📂 所属分类编译器/解释器
🏷️ 相关标签

#compiler #general-purpose #optimizing #given
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -