📄 scan.c
字号:
/*---------------------------------------------------------------------- File : scan.c Contents: scanner (lexical analysis of a character stream) Author : Christian Borgelt History : 16.01.1996 file created 21.02.1996 identifier recognition made more flexible 17.03.1996 keyword tokens removed 15.04.1996 duplicate state removed from sc_next 29.07.1997 < and > declared active (for decision trees) 08.09.1997 escape sequences in strings made possible 11.09.1997 single characters stored also in scn->value 08.02.1998 recover and error message functions added 09.02.1998 bug in state S_NUMPT concerning "-." removed 13.02.1998 token T_RGT ('->') added 04.03.1998 returned tokens changed for some states 17.04.1998 token T_LFT ('<-') added 27.05.1998 token T_CMP (two char comparison operator) added 31.05.1998 token conversion to number removed 08.02.1999 reading from standard input made possible 29.04.1999 quoted string parsing improved 13.11.1999 token string length stored in scn->len 23.11.2000 functions sc_fmtlen and sc_format added 15.07.2001 scanner made an object, state definitions added 16.07.2001 characters with code > 127 made printable look ahead functionality added (sc_back)----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdarg.h>#include <assert.h>#include "scan.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#ifdef SC_SCAN/* --- character classes --- */#define C_ILLEGAL 0 /* illegal character */#define C_SPACE 1 /* white space, e.g. ' ' '\t' '\n' */#define C_LETTER 2 /* letter or underscore '_' */#define C_DIGIT 3 /* digit */#define C_POINT 4 /* point, '.' */#define C_SIGN 5 /* sign, '+' or '-' */#define C_SLASH 6 /* slash, '/' */#define C_QUOTE 7 /* quote, e.g. '"' '`' */#define C_CMPOP 8 /* comparison operator, e.g. '<' */#define C_ACTIVE 9 /* active characters, e.g. ',' '(' *//* --- scanner states --- */#define S_SPACE 0 /* skipping white space */#define S_ID 1 /* reading identifier */#define S_NUMDIG 2 /* reading number, digit */#define S_NUMPT 3 /* reading number, decimal point */#define S_FRAC 4 /* reading number, digit and point */#define S_EXPIND 5 /* reading exponent, indicator */#define S_EXPSGN 6 /* reading exponent, sign */#define S_EXPDIG 7 /* reading exponent, digit */#define S_SIGN 8 /* sign read */#define S_CMPOP 9 /* reading comparison operator */#define S_STRING 10 /* reading quoted string */#define S_ESC 11 /* reading escaped character */#define S_OCT1 12 /* reading octal number, 1 digit */#define S_OCT2 13 /* reading octal number, 2 digits */#define S_HEX1 14 /* reading hexad. number, 1 digit */#define S_HEX2 15 /* reading hexad. number, 2 digits */#define S_SLASH 16 /* slash read */#define S_CPPCOM 17 /* reading C++ comment */#define S_CCOM1 18 /* reading C comment */#define S_CCOM2 19 /* reading C comment, possible end */#define S_CCOM3 20 /* reading C comment, possible start *//* --- functions --- */#define UNGETC(s,c) do { if ((c) == EOF) break; \ if ((c) == '\n') (s)->line--; \ ungetc(c, (s)->file); } while (0)/* --- additional error codes --- */#define E_UNKNOWN (-11) /* unknown error */#define MSGOFFSET (-16) /* offset for add. error messages *//* --- texts --- */#ifdef GERMAN /* deutsche Texte */#define FILETXT "Datei"#define LINETXT "Zeile"#else /* English texts */#define FILETXT "file"#define LINETXT "line"#endif /* #ifdef GERMAN .. #else .. */#endif /* #ifdef SC_SCAN *//*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*/static const char _scftab[256] = { /* scanable form classes */ /* NUL SOH STX ETX EOT ENQ ACK BEL *//* 00 */ 2, 2, 2, 2, 2, 2, 2, 'a', /* BS HT LF VT FF CR SO SI */ 'b', 't', 'n', 'v', 'f', 'r', 2, 2, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB *//* 10 */ 2, 2, 2, 2, 2, 2, 2, 2, /* CAN EM SUB ESC FS GS RS US */ 2, 2, 2, 2, 2, 2, 2, 2, /* ' ' '!' '"' '#' '$' '%' '&' ''' *//* 20 */ 1, 1, '"', 1, 1, 1, 1, 1, /* '(' ')' '*' '+' ',' '-' '.' '/' */ 1, 1, 1, 0, 1, 0, 0, 1, /* '0' '1' '2' '3' '4' '5' '6' '7' *//* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, /* '8' '9' ':' ';' '<' '=' '>' '?' */ 0, 0, 1, 1, 1, 1, 1, 1, /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' *//* 40 */ 1, 0, 0, 0, 0, 0, 0, 0, /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */ 0, 0, 0, 0, 0, 0, 0, 0, /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' *//* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */ 0, 0, 0, 1, '\\', 1, 1, 0, /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' *//* 60 */ 1, 0, 0, 0, 0, 0, 0, 0, /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */ 0, 0, 0, 0, 0, 0, 0, 0, /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' *//* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 'x' 'y' 'z' '{' '|' '}' '~' DEL */ 0, 0, 0, 1, 1, 1, 1, 2,/* 80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,/* 90 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,/* a0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,/* b0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,/* c0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,/* d0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,/* e0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,/* f0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };#ifdef SC_SCANstatic const char _ccltab[256] = { /* character classes */ /* NUL SOH STX ETX EOT ENQ ACK BEL *//* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, /* BS HT LF VT FF CR SO SI */ 0, 1, 1, 1, 1, 1, 0, 0, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB *//* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, /* CAN EM SUB ESC FS GS RS US */ 0, 0, 0, 0, 0, 0, 0, 0, /* ' ' '!' '"' '#' '$' '%' '&' ''' *//* 20 */ 1, 8, 7, 9, 9, 9, 9, 7, /* '(' ')' '*' '+' ',' '-' '.' '/' */ 9, 9, 9, 5, 9, 5, 4, 6, /* '0' '1' '2' '3' '4' '5' '6' '7' *//* 30 */ 3, 3, 3, 3, 3, 3, 3, 3, /* '8' '9' ':' ';' '<' '=' '>' '?' */ 3, 3, 9, 9, 8, 8, 8, 9, /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' *//* 40 */ 0, 2, 2, 2, 2, 2, 2, 2, /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */ 2, 2, 2, 2, 2, 2, 2, 2, /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' *//* 50 */ 2, 2, 2, 2, 2, 2, 2, 2, /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */ 2, 2, 2, 9, 9, 9, 9, 2, /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' *//* 60 */ 7, 2, 2, 2, 2, 2, 2, 2, /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */ 2, 2, 2, 2, 2, 2, 2, 2, /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' *//* 70 */ 2, 2, 2, 2, 2, 2, 2, 2, /* 'x' 'y' 'z' '{' '|' '}' '~' DEL */ 2, 2, 2, 9, 9, 9, 9, 0,/* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/* a0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/* b0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/* c0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/* d0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/* e0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/* f0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };#ifdef GERMAN /* deutsche Texte */static const char *_errmsgs[] = { /* error messages */ /* E_NONE 0 */ "kein Fehler", /* E_NOMEM -1 */ "nicht genug Speicher", /* E_FOPEN -2 */ "謋fnen fehlgeschlagen", /* E_FREAD -3 */ "Lesefehler", /* E_FWRITE -4 */ "Schreibfehler", /* E_ILLCHR -5 */ "ung黮tiges Zeichen '%c' (0x%02x)", /* E_BUFOVF -6 */ "Puffer黚erlauf", /* E_UNTSTR -7 */ "unbeendete Zeichenkette", /* E_UNTCOM -8 */ "unerwartetes Dateiende in Kommentar " "(Anfang in Zeile %d)", /* E_STATE -9 */ "ung黮tiger Scannerzustand", /* E_GARBAGE -10 */ "ung黮tiger Text am Dateiende", /* E_UNKNOWN -11 */ "unbekannter Fehler"};#else /* English texts */static const char *_errmsgs[] = { /* error messages */ /* E_NONE 0 */ "no error", /* E_NOMEM -1 */ "not enough memory", /* E_FOPEN -2 */ "file open failed", /* E_FREAD -3 */ "file read failed", /* E_FWRITE -4 */ "file write failed", /* E_ILLCHR -5 */ "illegal character '%c' (0x%02x)", /* E_BUFOVF -6 */ "scan buffer overflow", /* E_UNTSTR -7 */ "unterminated string", /* E_UNTCOM -8 */ "unexpected end of file in comment " "started on line %d", /* E_STATE -9 */ "illegal scanner state", /* E_GARBAGE -10 */ "garbage at end of file", /* E_UNKNOWN -11 */ "unknown error"};#endif /* #ifdef GERMAN .. #else .. */#endif /* #ifdef SC_SCAN *//*---------------------------------------------------------------------- Auxiliary Functions----------------------------------------------------------------------*/#ifdef SC_SCANstatic int _swap (SCAN *scan){ /* --- swap token information */ int t; /* swap buffer */ if (scan->value == scan->buf[0]) scan->value = scan->buf[1]; else scan->value = scan->buf[0]; t = scan->plen; scan->plen = scan->len; scan->len = t; t = scan->pline; scan->pline = scan->line; scan->line = t; t = scan->ptoken; scan->ptoken = scan->token; scan->token = t; return t; /* return the new token */} /* _swap() */#endif/*---------------------------------------------------------------------- Main Functions----------------------------------------------------------------------*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -