📄 lexi.c

📁 早期freebsd实现
💻 C
字号:
/* * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 *	The Regents of the University of California.  All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in the *    documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software *    must display the following acknowledgement: *	This product includes software developed by the University of *	California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors *    may be used to endorse or promote products derived from this software *    without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */#ifndef lintstatic char sccsid[] = "@(#)lexi.c	8.1 (Berkeley) 6/6/93";#endif /* not lint *//* * Here we have the token scanner for indent.  It scans off one token and puts * it in the global variable "token".  It returns a code, indicating the type * of token scanned. */#include <stdio.h>#include <ctype.h>#include <stdlib.h>#include <string.h>#include "indent_globs.h"#include "indent_codes.h"#define alphanum 1#define opchar 3struct templ {    char       *rwd;    int         rwcode;};struct templ specials[100] ={    "switch", 1,    "case", 2,    "break", 0,    "struct", 3,    "union", 3,    "enum", 3,    "default", 2,    "int", 4,    "char", 4,    "float", 4,    "double", 4,    "long", 4,    "short", 4,    "typdef", 4,    "unsigned", 4,    "register", 4,    "static", 4,    "global", 4,    "extern", 4,    "void", 4,    "goto", 0,    "return", 0,    "if", 5,    "while", 5,    "for", 5,    "else", 6,    "do", 6,    "sizeof", 7,    0, 0};char        chartype[128] ={				/* this is used to facilitate the decision of				 * what type (alphanumeric, operator) each				 * character is */    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,    0, 3, 0, 0, 1, 3, 3, 0,    0, 0, 3, 3, 0, 3, 0, 3,    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 0, 0, 3, 3, 3, 3,    0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 3, 1,    0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 3, 0, 3, 0};intlexi(){    int         unary_delim;	/* this is set to 1 if the current token				 * 				 * forces a following operator to be unary */    static int  last_code;	/* the last token type returned */    static int  l_struct;	/* set to 1 if the last token was 'struct' */    int         code;		/* internal code to be returned */    char        qchar;		/* the delimiter character for a string */    e_token = s_token;		/* point to start of place to save token */    unary_delim = false;    ps.col_1 = ps.last_nl;	/* tell world that this token started in				 * column 1 iff the last thing scanned was nl */    ps.last_nl = false;    while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */	ps.col_1 = false;	/* leading blanks imply token is not in column				 * 1 */	if (++buf_ptr >= buf_end)	    fill_buffer();    }    /* Scan an alphanumeric token */    if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {	/*	 * we have a character or number	 */	register char *j;	/* used for searching thru list of				 * 				 * reserved words */	register struct templ *p;	if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {	    int         seendot = 0,	                seenexp = 0;	    if (*buf_ptr == '0' &&		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {		*e_token++ = *buf_ptr++;		*e_token++ = *buf_ptr++;		while (isxdigit(*buf_ptr)) {		    CHECK_SIZE_TOKEN;		    *e_token++ = *buf_ptr++;		}	    }	    else		while (1) {		    if (*buf_ptr == '.')			if (seendot)			    break;			else			    seendot++;		    CHECK_SIZE_TOKEN;		    *e_token++ = *buf_ptr++;		    if (!isdigit(*buf_ptr) && *buf_ptr != '.')			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)			    break;			else {			    seenexp++;			    seendot++;			    CHECK_SIZE_TOKEN;			    *e_token++ = *buf_ptr++;			    if (*buf_ptr == '+' || *buf_ptr == '-')				*e_token++ = *buf_ptr++;			}		}	    if (*buf_ptr == 'L' || *buf_ptr == 'l')		*e_token++ = *buf_ptr++;	}	else	    while (chartype[*buf_ptr] == alphanum) {	/* copy it over */		CHECK_SIZE_TOKEN;		*e_token++ = *buf_ptr++;		if (buf_ptr >= buf_end)		    fill_buffer();	    }	*e_token++ = '\0';	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */	    if (++buf_ptr >= buf_end)		fill_buffer();	}	ps.its_a_keyword = false;	ps.sizeof_keyword = false;	if (l_struct) {		/* if last token was 'struct', then this token				 * should be treated as a declaration */	    l_struct = false;	    last_code = ident;	    ps.last_u_d = true;	    return (decl);	}	ps.last_u_d = false;	/* Operator after indentifier is binary */	last_code = ident;	/* Remember that this is the code we will				 * return */	/*	 * This loop will check if the token is a keyword.	 */	for (p = specials; (j = p->rwd) != 0; p++) {	    register char *p = s_token;	/* point at scanned token */	    if (*j++ != *p++ || *j++ != *p++)		continue;	/* This test depends on the fact that				 * identifiers are always at least 1 character				 * long (ie. the first two bytes of the				 * identifier are always meaningful) */	    if (p[-1] == 0)		break;		/* If its a one-character identifier */	    while (*p++ == *j)		if (*j++ == 0)		    goto found_keyword;	/* I wish that C had a multi-level					 * break... */	}	if (p->rwd) {		/* we have a keyword */    found_keyword:	    ps.its_a_keyword = true;	    ps.last_u_d = true;	    switch (p->rwcode) {	    case 1:		/* it is a switch */		return (swstmt);	    case 2:		/* a case or default */		return (casestmt);	    case 3:		/* a "struct" */		if (ps.p_l_follow)		    break;	/* inside parens: cast */		l_struct = true;		/*		 * Next time around, we will want to know that we have had a		 * 'struct'		 */	    case 4:		/* one of the declaration keywords */		if (ps.p_l_follow) {		    ps.cast_mask |= 1 << ps.p_l_follow;		    break;	/* inside parens: cast */		}		last_code = decl;		return (decl);	    case 5:		/* if, while, for */		return (sp_paren);	    case 6:		/* do, else */		return (sp_nparen);	    case 7:		ps.sizeof_keyword = true;	    default:		/* all others are treated like any other				 * identifier */		return (ident);	    }			/* end of switch */	}			/* end of if (found_it) */	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {	    register char *tp = buf_ptr;	    while (tp < buf_end)		if (*tp++ == ')' && (*tp == ';' || *tp == ','))		    goto not_proc;	    strncpy(ps.procname, token, sizeof ps.procname - 1);	    ps.in_parameter_declaration = 1;	    rparen_count = 1;    not_proc:;	}	/*	 * The following hack attempts to guess whether or not the current	 * token is in fact a declaration keyword -- one that has been	 * typedefd	 */	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')		&& !ps.p_l_follow	        && !ps.block_init		&& (ps.last_token == rparen || ps.last_token == semicolon ||		    ps.last_token == decl ||		    ps.last_token == lbrace || ps.last_token == rbrace)) {	    ps.its_a_keyword = true;	    ps.last_u_d = true;	    last_code = decl;	    return decl;	}	if (last_code == decl)	/* if this is a declared variable, then				 * following sign is unary */	    ps.last_u_d = true;	/* will make "int a -1" work */	last_code = ident;	return (ident);		/* the ident is not in the list */    }				/* end of procesing for alpanum character */    /* Scan a non-alphanumeric token */    *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is				 * moved here */    *e_token = '\0';    if (++buf_ptr >= buf_end)	fill_buffer();    switch (*token) {    case '\n':	unary_delim = ps.last_u_d;	ps.last_nl = true;	/* remember that we just had a newline */	code = (had_eof ? 0 : newline);	/*	 * if data has been exausted, the newline is a dummy, and we should	 * return code to stop	 */	break;    case '\'':			/* start of quoted character */    case '"':			/* start of string */	qchar = *token;	if (troff) {	    e_token[-1] = '`';	    if (qchar == '"')		*e_token++ = '`';	    e_token = chfont(&bodyf, &stringf, e_token);	}	do {			/* copy the string */	    while (1) {		/* move one character or [/<char>]<char> */		if (*buf_ptr == '\n') {		    printf("%d: Unterminated literal\n", line_no);		    goto stop_lit;		}		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,					 * since CHECK_SIZE guarantees that there					 * are at least 5 entries left */		*e_token = *buf_ptr++;		if (buf_ptr >= buf_end)		    fill_buffer();		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */		    if (*buf_ptr == '\n')	/* check for escaped newline */			++line_no;		    if (troff) {			*++e_token = BACKSLASH;			if (*buf_ptr == BACKSLASH)			    *++e_token = BACKSLASH;		    }		    *++e_token = *buf_ptr++;		    ++e_token;	/* we must increment this again because we				 * copied two chars */		    if (buf_ptr >= buf_end)			fill_buffer();		}		else		    break;	/* we copied one character */	    }			/* end of while (1) */	} while (*e_token++ != qchar);	if (troff) {	    e_token = chfont(&stringf, &bodyf, e_token - 1);	    if (qchar == '"')		*e_token++ = '\'';	}stop_lit:	code = ident;	break;    case ('('):    case ('['):	unary_delim = true;	code = lparen;	break;    case (')'):    case (']'):	code = rparen;	break;    case '#':	unary_delim = ps.last_u_d;	code = preesc;	break;    case '?':	unary_delim = true;	code = question;	break;    case (':'):	code = colon;	unary_delim = true;	break;    case (';'):	unary_delim = true;	code = semicolon;	break;    case ('{'):	unary_delim = true;	/*	 * if (ps.in_or_st) ps.block_init = 1;	 */	/* ?	code = ps.block_init ? lparen : lbrace; */	code = lbrace;	break;    case ('}'):	unary_delim = true;	/* ?	code = ps.block_init ? rparen : rbrace; */	code = rbrace;	break;    case 014:			/* a form feed */	unary_delim = ps.last_u_d;	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'				 * right */	code = form_feed;	break;    case (','):	unary_delim = true;	code = comma;	break;    case '.':	unary_delim = false;	code = period;	break;    case '-':    case '+':			/* check for -, +, --, ++ */	code = (ps.last_u_d ? unary_op : binary_op);	unary_delim = true;	if (*buf_ptr == token[0]) {	    /* check for doubled character */	    *e_token++ = *buf_ptr++;	    /* buffer overflow will be checked at end of loop */	    if (last_code == ident || last_code == rparen) {		code = (ps.last_u_d ? unary_op : postop);		/* check for following ++ or -- */		unary_delim = false;	    }	}	else if (*buf_ptr == '=')	    /* check for operator += */	    *e_token++ = *buf_ptr++;	else if (*buf_ptr == '>') {	    /* check for operator -> */	    *e_token++ = *buf_ptr++;	    if (!pointer_as_binop) {		unary_delim = false;		code = unary_op;		ps.want_blank = false;	    }	}	break;			/* buffer overflow will be checked at end of				 * switch */    case '=':	if (ps.in_or_st)	    ps.block_init = 1;#ifdef undef	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */	    e_token[-1] = *buf_ptr++;	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)		*e_token++ = *buf_ptr++;	    *e_token++ = '=';	/* Flip =+ to += */	    *e_token = 0;	}#else	if (*buf_ptr == '=') {/* == */	    *e_token++ = '=';	/* Flip =+ to += */	    buf_ptr++;	    *e_token = 0;	}#endif	code = binary_op;	unary_delim = true;	break;	/* can drop thru!!! */    case '>':    case '<':    case '!':			/* ops like <, <<, <=, !=, etc */	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {	    *e_token++ = *buf_ptr;	    if (++buf_ptr >= buf_end)		fill_buffer();	}	if (*buf_ptr == '=')	    *e_token++ = *buf_ptr++;	code = (ps.last_u_d ? unary_op : binary_op);	unary_delim = true;	break;    default:	if (token[0] == '/' && *buf_ptr == '*') {	    /* it is start of comment */	    *e_token++ = '*';	    if (++buf_ptr >= buf_end)		fill_buffer();	    code = comment;	    unary_delim = ps.last_u_d;	    break;	}	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {	    /*	     * handle ||, &&, etc, and also things as in int *****i	     */	    *e_token++ = *buf_ptr;	    if (++buf_ptr >= buf_end)		fill_buffer();	}	code = (ps.last_u_d ? unary_op : binary_op);	unary_delim = true;    }				/* end of switch */    if (code != newline) {	l_struct = false;	last_code = code;    }    if (buf_ptr >= buf_end)	/* check for input buffer empty */	fill_buffer();    ps.last_u_d = unary_delim;    *e_token = '\0';		/* null terminate the token */    return (code);}/* * Add the given keyword to the keyword table, using val as the keyword type */addkey(key, val)    char       *key;{    register struct templ *p = specials;    while (p->rwd)	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)	    return;	else	    p++;    if (p >= specials + sizeof specials / sizeof specials[0])	return;			/* For now, table overflows are silently				 * ignored */    p->rwd = key;    p->rwcode = val;    p[1].rwd = 0;    p[1].rwcode = 0;    return;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -