📄 parse.c
字号:
/* parse.c -- parse a C-source file (or a file with something resembling C) *//* This is part of cdg - a C-source Documentation Generator. Copyright (C) 1995, 1996 Peter Knoppers. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*/#include <stdlib.h>#include <stdio.h>#include <string.h>#include "y.tab.h"#include "cdg.h"/* * maketag: Output a word for the cross reference list. * arguments: * int lineno: Line number in the input file on which the word * occurred. * char * identifier: The word that must be put in the cross reference list. * char type: The way the word is used. Possible values are: * 'T': word is typedef-d * 't': word is a type * 'F': word is a function being defined * 'C': word is a function being called * 'D': word is a variable being defined * 'd': word is arg in extern function declaration * 'R': word is a variable being referenced * 'M': word is a macro being defined * 'm': word is referenced in cpp-command * 'N': word is an enum constant * 'E': word is an enum type * returns: Nothing. * remark: The content of identifier is erased. */void maketag (int lineno, char * identifier, char type){ fprintf (tagfile, "%6d\t%c\t%s\n", lineno, type, identifier); identifier[0] = '\0';}#define ASSUMEDTYPE 999 /* value for token that appears to be a type */#ifndef MAXPARENLEVEL#define MAXPARENLEVEL 100 /* max parentheses nesting depth */#endif#ifndef MAXBRACELEVEL#define MAXBRACELEVEL 100 /* max brace nesting depth */#endifenum /* states of the parser */{ S_GLOBAL, /* in global declaration part */ S_TYPEDEF, /* in typedef, structdef, uniondef, or enum */ S_LOCAL, /* in local declaration part */ S_CODE /* in code part of a function */};/* * parsefile: Parse a C source and identify all words that should go * in the cross reference list. * arguments: None. * returns: Int, nonzero on error, zero if no error. * remark: Recognition of cross-referable words inside macros and * cpp-commands is done in the lexical scanner. * This scanner does not parse any particular standard of C. Instead it parses * anything that looks like C, K&R-C or ANSI-C (but NOT C++). * Using a strict parser causes enormous problems if not all applicable header * files are scanned. Otherwise the parser can not know that some words (such * as FILE) are in fact types that were defined in some header file. * The parser does a good job at guessing whether a word is an identifier or a * type, but there is at least one case where there is no way to determine for * sure. See the comment in the parser for details. * * Another reason for writing a tolerant parser is that code between #ifdef, * #else and #endif should be esamined for the cross reference table. * * The easiest way to seriously confuse this scanner is to feed it an input * file with unbalanced braces or unbalanced parentheses. Code like: * printf ("Hello %s\n", * #ifdef BIG * "big world"); * #else * "small world"); * #endif * has unbalanced parentheses. It should be rewritten into something like * printf ("Hello %s\n", * #ifdef BIG * "big world" * #else * "small world" * #endif * ); * Similar problems can be constricted with braces. */int parsefile (void){ int state = S_GLOBAL; int bracelevel = 0; int parenlevel = 0; int bracketlevel = 0; int token; int prevtoken = 0; int prevprevtoken = 0; int prevprevprevtoken = 0; int lineno = 0; int externflag = 0; int enumflag = 0; int enumbracelevel = 0; /* bracelevel when ENUM was seen */ int typedefbracelevel = 0; /* bracelevel when TYPEDEF was seen */ int RHSflag[MAXPARENLEVEL]; /* this is a stack */ int savedstate[MAXBRACELEVEL]; /* this is another stack */ char previdentifier[MAXIDENTIFIER + 1];/* last seen, not yet output name */ previdentifier[0] = '\0'; while ((token = yylex()) != 0) /* yylex returns 0 on EOF */ { switch (token) { case TYPEDEF: state = S_TYPEDEF; typedefbracelevel = bracelevel; break; case ')': if (--parenlevel < 0) { fprintf (stderr, "Line %d: unbalanced parentheses\n", yylineno); parenlevel = 0; } if (previdentifier[0] != 0) if ((prevprevprevtoken == '(') && (prevprevtoken == IDENTIFIER) && (prevtoken == '*')) maketag (lineno, previdentifier, 't'); /* type cast */ else if ((prevprevtoken == '(') && (prevprevprevtoken != IF) && (prevprevprevtoken != WHILE)) break; /* this _may_ be a type cast */ else if (externflag != 0) /* arg in extern function decl */ maketag (lineno, previdentifier, 'd'); else if (((prevprevtoken == STRUCT) || (prevprevtoken == UNION)) && (prevtoken == IDENTIFIER)) maketag (lineno, previdentifier, 't'); /* type cast */ else if (state == S_GLOBAL) /* arg in function def */ maketag (lineno, previdentifier, 'D'); else /* arg in function call */ maketag (lineno, previdentifier, 'R'); break; case ENUM: enumflag++; enumbracelevel = bracelevel; break; case EXTERN: externflag = 1; /* FALL THROUGH */ case STATIC: case AUTO: case REGISTER: case CHAR: case SHORT: case INT: case LONG: case SIGNED: case UNSIGNED: case FLOAT: case DOUBLE: case CONST: case VOLATILE: case VOID: case STRUCT: case UNION: case CONSTANT: case STRING_LITERAL: case SIZEOF: case ELIPSIS: if (previdentifier[0] != 0) if (prevtoken == ')') maketag (lineno, previdentifier, 't'); else maketag (lineno, previdentifier, 'R'); break; case MUL_ASSIGN: case DIV_ASSIGN: case MOD_ASSIGN: case ADD_ASSIGN: case SUB_ASSIGN: case LEFT_ASSIGN: case RIGHT_ASSIGN: case XOR_ASSIGN: case OR_ASSIGN: case AND_ASSIGN: case PTR_OP: case '.': case INC_OP: case DEC_OP: case LEFT_OP: case RIGHT_OP: case LE_OP: case GE_OP: case EQ_OP: case NE_OP: case AND_OP: case OR_OP: case '+': case '/': case '%': case ':': case '&': case '!': case '~': case '<': case '>': case '|': case '?': case '-': case '^': if (prevprevtoken == ';') state = S_CODE; RHSflag[parenlevel] = 1; if (previdentifier[0] != '\0') /* * There is one case that we cannot be sure of: * In '(' IDENTIFIER ')' '-' either * - the IDENTIFIER is a cast and the '-' is a unary minus * or * - the parentheses are superfluous and the '-' is a binary * minus * We assume the first case because it is more plausible. */ if ((prevprevprevtoken == '(') && (prevprevtoken == IDENTIFIER) && (prevtoken == ')') && (token == '-')) maketag (lineno, previdentifier, 't'); /* assume unary - */ else maketag (lineno, previdentifier, 'R'); break; case '*': if (((prevprevtoken == STRUCT) || (prevprevtoken == UNION)) && (prevtoken == IDENTIFIER)) /* type cast */ if (previdentifier[0] != '\0') /* should always succeed (?) */ maketag (lineno, previdentifier, 't'); break; case IDENTIFIER: if (previdentifier[0] != '\0') { maketag (lineno, previdentifier, 't'); prevtoken = ASSUMEDTYPE; } strncpy (previdentifier, identifierstring, MAXIDENTIFIER); lineno = yylineno + totalinsertednewlines; previdentifier[MAXIDENTIFIER] = '\0'; break; case ';': if (enumflag != 0) { if (bracelevel == enumbracelevel) { if (previdentifier[0] != '\0') /* enum type */ maketag (lineno, previdentifier, 'T'); enumflag = 0; } else if (previdentifier[0] != '\0') /* in enum list */ maketag (lineno, previdentifier, 'N'); } if (state == S_TYPEDEF) { if (previdentifier[0] != '\0') if (bracelevel > 0) maketag (lineno, previdentifier, 'D'); else maketag (lineno, previdentifier, 'T'); if (bracelevel == typedefbracelevel) { if (bracelevel == 0) state = S_GLOBAL; else state = S_LOCAL; } } else if (previdentifier[0] != '\0') if (externflag != 0) maketag (lineno, previdentifier, 'E'); else if (((state == S_GLOBAL) || (state == S_LOCAL)) && (RHSflag[parenlevel] == 0)) maketag (lineno, previdentifier, 'D'); else /* state == S_CODE */ maketag (lineno, previdentifier, 'R'); externflag = 0; RHSflag[parenlevel] = 0; break; case '=': RHSflag[parenlevel] = 1; if (prevprevtoken == ';') state = S_CODE; if (previdentifier[0] != '\0') if (((state == S_GLOBAL) || (state == S_LOCAL)) && (externflag == 0) && (prevprevtoken != '{')) maketag (lineno, previdentifier, 'D'); else if (enumflag != 0) /* enum const */ maketag (lineno, previdentifier, 'N'); else if (externflag != 0) maketag (lineno, previdentifier, 'E'); else maketag (lineno, previdentifier, 'R'); previdentifier [0] = '\0'; break; case ',': if (previdentifier[0] != '\0') if (enumflag != 0) /* enum const */ maketag (lineno, previdentifier, 'N'); else if (((state == S_GLOBAL) || (state == S_LOCAL)) && (externflag == 0) && (prevprevtoken != '{') && (prevprevtoken != ')')) maketag (lineno, previdentifier, 'D'); else if (externflag != 0) maketag (lineno, previdentifier, 'E'); else maketag (lineno, previdentifier, 'R'); previdentifier [0] = '\0'; RHSflag[parenlevel] = 0; break; case '{': savedstate[bracelevel] = state; if (++bracelevel >= MAXBRACELEVEL) { fprintf (stderr, "Braces nested too deeply, recompile %s%d.\n", "with MAXBRACELEVEL > ", MAXBRACELEVEL); exit (1); } if ((prevprevtoken == STRUCT) || (prevprevtoken == UNION) || (prevprevtoken == TYPEDEF) || (prevprevtoken == ENUM)) { state = S_TYPEDEF; if (previdentifier[0] != '\0') maketag (lineno, previdentifier, 'T'); } else { state = S_LOCAL; if (previdentifier[0] != '\0') maketag (lineno, previdentifier, 'R'); } break; case '}': if (--bracelevel < 0) { fprintf (stderr, "Line %d: unbalanced braces\n", yylineno); bracelevel = 0; } state = savedstate[bracelevel]; if (previdentifier[0] != '\0') if (enumflag > 0) /* end of enum list */ maketag (lineno, previdentifier, 'N'); else /* struct or union initializer */ maketag (lineno, previdentifier, 'R'); break; case '[': bracketlevel++; if (previdentifier[0] != '\0') switch (prevprevtoken) { case EXTERN: case STATIC: case AUTO: case REGISTER: case CHAR: case SHORT: case INT: case LONG: case SIGNED: case UNSIGNED: case FLOAT: case DOUBLE: case ASSUMEDTYPE: if (previdentifier[0] != '\0') if (externflag == 0) maketag (lineno, previdentifier, 'D'); else maketag (lineno, previdentifier, 'E'); break; case ';': state = S_CODE; /* FALL THROUGH */ default: if (previdentifier[0] != '\0') maketag (lineno, previdentifier, 'R'); break; } break; case ']': if (--bracketlevel < 0) { fprintf (stderr, "Line %d: unbalanced brackets\n", yylineno); bracketlevel = 0; } if (previdentifier[0] != '\0') maketag (lineno, previdentifier, 'R'); break; case '(': if (++parenlevel >= MAXPARENLEVEL) { fprintf (stderr, "Parentheses nested too deeply, %s%d.\n", "recompile with MAXPARENLEVEL > ", MAXPARENLEVEL); exit (1); } RHSflag[parenlevel] = 0; if (previdentifier[0] != '\0') if (bracelevel == 0) maketag (lineno, previdentifier, 'F'); else switch (prevprevtoken) { case EXTERN: case STATIC: case AUTO: case REGISTER: case CHAR: case SHORT: case INT: case LONG: case SIGNED: case UNSIGNED: case FLOAT: case DOUBLE: maketag (lineno, previdentifier, 'F'); break; default: /* should check if prevtoken is typedef */ if (bracelevel == 0) fprintf (stderr, "Line %d: function call at bracelevel 0\n", yylineno); else if (state == S_LOCAL) state = S_CODE; maketag (lineno, previdentifier, 'C'); break; } break; case CASE: case DEFAULT: case IF: case ELSE: case SWITCH: case WHILE: case DO: case FOR: case GOTO: case CONTINUE: case BREAK: case RETURN: state = S_CODE; if (previdentifier[0] != '\0') maketag (lineno, previdentifier, 'R'); break; default: fprintf(stderr, "line %d: unknown token: %d (\'%c\')\n", yylineno, token, token); } /* * Shift token in prev...token list */ if (prevtoken != '*') /* drop '*' from prev... list */ { prevprevprevtoken = prevprevtoken; prevprevtoken = prevtoken; } prevtoken = token; } if ((previdentifier[0] != '\0') || (parenlevel != 0) || (bracelevel != 0) || (bracketlevel != 0)) fprintf (stderr, "Unexpected EOF\n"); return (0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -