📄 scanner.java
字号:
/*---------------------------------------------------------------------- File : Scanner.java Contents: scanner (lexical analysis of a character stream) Author : Christian Borgelt History : 10.05.2004 file created from file scan.c 11.05.2004 function to put back token renamed to pushBack 20.05.2004 line number reporting added 25.05.2004 pushBack added as a synonym for ungetToken 06.07.2004 bug in getc concerning EOF fixed 21.02.2005 some utility functions added 01.03.2005 optional newline token added----------------------------------------------------------------------*/package util;import java.io.InputStream;import java.io.FileInputStream;import java.io.InputStreamReader;import java.io.BufferedReader;import java.io.IOException;/*--------------------------------------------------------------------*/public class Scanner {/*--------------------------------------------------------------------*/ public static final int EOF = -1; /* end of file */ /* --- tokens --- */ public static final int T_EOF = 256; /* end of file */ public static final int T_NUM = 257; /* number (floating point) */ public static final int T_ID = 258; /* identifier or string */ public static final int T_RGT = 259; /* right arrow '->' */ public static final int T_LFT = 260; /* left arrow '<-' */ public static final int T_CMP = 261; /* two char. comparison */ /* --- character classes --- */ private static final int C_ILLEGAL = 0; /* illegal character */ private static final int C_SPACE = 1; /* white space, e.g. ' ' */ private static final int C_LETTER = 2; /* letter or underscore '_' */ private static final int C_DIGIT = 3; /* digit */ private static final int C_POINT = 4; /* point, '.' */ private static final int C_SIGN = 5; /* sign, '+' or '-' */ private static final int C_SLASH = 6; /* slash, '/' */ private static final int C_QUOTE = 7; /* quote, e.g. '"' '`' */ private static final int C_CMPOP = 8; /* comparison operator, '<' */ private static final int C_ACTIVE = 9; /* active characters, ',' */ /* --- scanner states --- */ private static final int S_SPACE = 0; /* skipping white space */ private static final int S_ID = 1; /* reading identifier */ private static final int S_NUMDIG = 2; /* number, digit */ private static final int S_NUMPT = 3; /* number, decimal point */ private static final int S_FRAC = 4; /* number, digit and point */ private static final int S_EXPIND = 5; /* exponent, indicator */ private static final int S_EXPSGN = 6; /* exponent, sign */ private static final int S_EXPDIG = 7; /* exponent, digit */ private static final int S_SIGN = 8; /* sign read */ private static final int S_CMPOP = 9; /* comparison operator */ private static final int S_STRING = 10; /* quoted string */ private static final int S_ESC = 11; /* escaped character */ private static final int S_OCT1 = 12; /* octal number, 1 digit */ private static final int S_OCT2 = 13; /* octal number, 2 digits */ private static final int S_HEX1 = 14; /* hexad. number, 1 digit */ private static final int S_HEX2 = 15; /* hexad. number, 2 digits */ private static final int S_SLASH = 16; /* slash read */ private static final int S_CPPCOM = 17; /* C++ comment */ private static final int S_CCOM1 = 18; /* C comment */ private static final int S_CCOM2 = 19; /* C comment, poss. end */ private static final int S_CCOM3 = 20; /* C comment, poss. start */ /* --- character classes --- */ private static final char ccltab[] = { /* NUL SOH STX ETX EOT ENQ ACK BEL */ /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, /* BS HT LF VT FF CR SO SI */ 0, 1, 1, 1, 1, 1, 0, 0, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, /* CAN EM SUB ESC FS GS RS US */ 0, 0, 0, 0, 0, 0, 0, 0, /* ' ' '!' '"' '#' '$' '%' '&' ''' */ /* 20 */ 1, 8, 7, 9, 9, 9, 9, 7, /* '(' ')' '*' '+' ',' '-' '.' '/' */ 9, 9, 9, 5, 9, 5, 4, 6, /* '0' '1' '2' '3' '4' '5' '6' '7' */ /* 30 */ 3, 3, 3, 3, 3, 3, 3, 3, /* '8' '9' ':' ';' '<' '=' '>' '?' */ 3, 3, 9, 9, 8, 8, 8, 9, /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */ /* 40 */ 0, 2, 2, 2, 2, 2, 2, 2, /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */ 2, 2, 2, 2, 2, 2, 2, 2, /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */ /* 50 */ 2, 2, 2, 2, 2, 2, 2, 2, /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */ 2, 2, 2, 9, 9, 9, 9, 2, /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */ /* 60 */ 7, 2, 2, 2, 2, 2, 2, 2, /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */ 2, 2, 2, 2, 2, 2, 2, 2, /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */ /* 70 */ 2, 2, 2, 2, 2, 2, 2, 2, /* 'x' 'y' 'z' '{' '|' '}' '~' DEL */ 2, 2, 2, 9, 9, 9, 9, 0, /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* b0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* c0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* e0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* f0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; /* --- instance variables --- */ private BufferedReader reader; /* reader for input file */ private String curr; /* current input line */ private int pos; /* current position in input line */ private int cbuf; /* buffer for unget character */ private boolean back; /* flag for unget token */ private boolean nltok; /* for for newline tokens */ public int ttype; /* current token type */ public String value; /* current token value */ public int line; /* current line number */ /*------------------------------------------------------------------*/ public Scanner (String s) { /* --- create a scanner for a string */ this.reader = null; /* clear the file reader */ this.curr = s; /* store the line to scan */ this.cbuf = -1; /* there is no buffered character */ this.back = false; /* and no buffered token */ this.nltok = false; /* newline '\n' is not a token */ this.line = 1; /* initialize the line number */ } /* Scanner() */ /*------------------------------------------------------------------*/ public Scanner (InputStream f) throws IOException { /* --- create a scanner for a file */ this.reader = new BufferedReader(new InputStreamReader(f)); this.curr = this.reader.readLine(); if (this.curr != null) this.curr += '\n'; this.cbuf = -1; /* read the first line of the file */ this.back = false; /* the buffers are both empty */ this.nltok = false; /* newline '\n' is not a token */ this.line = 1; /* initialize the line number */ } /* Scanner() */ /*------------------------------------------------------------------*/ public String lno () { return " (line " +this.line +")"; } /*------------------------------------------------------------------*/ public void nlToken (boolean flag) { this.nltok = flag; } /*------------------------------------------------------------------*/ private int getc () throws IOException { /* --- get the next character */ int c; /* next character */ if (this.cbuf >= 0) { /* check for a buffered character */ c = (char)this.cbuf; this.cbuf = -1; return c; } if (this.curr == null) return EOF; while (this.pos >= this.curr.length()) { if (this.reader == null) return EOF; this.curr = this.reader.readLine(); if (this.curr == null) return EOF; this.curr += '\n'; /* if the current line is processed, */ this.pos = 0; /* read a new line and reset position */ } c = this.curr.charAt(this.pos++); if (c == '\n') this.line++; /* count a new line and */ return c; /* return the next character */ } /* getc() */ /*------------------------------------------------------------------*/ private void ungetc (int c) { this.cbuf = c; } /*------------------------------------------------------------------*/ public int nextToken () throws IOException { /* --- get next token */ int c, ccl; /* character and character class */ int quote = 0; /* quote at the start of a string */ int ec = 0; /* escaped character */ int state = 0; /* state of automaton */ int level = 0; /* comment nesting level */ StringBuffer s; /* buffer for token value */ if (this.back) { /* check for a returned token */ this.back = false; return this.ttype; } s = new StringBuffer(); /* get a buffer for the token value */ while (true) { /* read loop */ c = this.getc(); /* get character and character class */ ccl = (c < 0) ? -1 : ccltab[c]; switch (state) { /* evaluate state of automaton */ case S_SPACE: /* --- skip white space */ switch (ccl) { /* evaluate character category */ case C_SPACE : if ((c == '\n') && this.nltok) { s.append((char)c); this.value = s.toString(); return this.ttype = c; } /* if newline is a token, return it, */ break; /* otherwise do nothing */ case C_LETTER: s.append((char)c); state = S_ID; break; case C_DIGIT : s.append((char)c); state = S_NUMDIG; break; case C_POINT : s.append((char)c); state = S_NUMPT; break; case C_SIGN : s.append((char)c); state = S_SIGN; break; case C_CMPOP : s.append((char)c); state = S_CMPOP; break; case C_QUOTE : quote = c; state = S_STRING; break; case C_SLASH : state = S_SLASH; break; case C_ACTIVE: s.append((char)c); this.value = s.toString(); return this.ttype = c; case EOF : this.value = "<eof>"; return this.ttype = T_EOF; default : s.append((char)c); this.value = s.toString(); throw new IOException("illegal character '" +(char)c +"'" +this.lno()); } break; case S_ID: /* --- identifier (letter read) */ if ((ccl == C_LETTER) /* if another letter */ || (ccl == C_DIGIT) /* or a digit */ || (ccl == C_POINT) /* or a decimal point */ || (ccl == C_SIGN)){ /* or a sign follows */ s.append((char)c); break; } /* buffer character */ this.ungetc(c); /* put back last character */ this.value = s.toString(); return this.ttype = T_ID; /* return 'identifier' */ case S_NUMDIG: /* --- number (digit read) */ s.append((char)c); /* buffer character */ if (ccl == C_DIGIT) /* if another digit follows, */ break; /* do nothing */ if (ccl == C_POINT){ /* if a decimal point follows, */ state = S_FRAC; break; } /* go to 'fraction' state */ if ((c == 'e') /* if an exponent indicator follows */ || (c == 'E')) { /* (lower- or uppercase), */ state = S_EXPIND; break; } /* go to 'exponent' state */ if ((ccl == C_LETTER) /* if a letter */ || (ccl == C_SIGN)){ /* or a sign follows, */ state = S_ID; break;/* go to 'identifier' state */ } /* otherwise */ this.ungetc(c); /* put back last character */ this.value = s.toString().substring(0,s.length()-1); return this.ttype = T_NUM; /* return 'number' */ case S_NUMPT: /* --- number (point read) */ s.append((char)c); /* buffer character */ if (ccl == C_DIGIT) { /* if a digit follows, */ state = S_FRAC; break; } /* go to 'fraction' state */ if ((ccl == C_LETTER) /* if a letter */ || (ccl == C_POINT) /* or a decimal point */ || (ccl == C_SIGN)){ /* or a sign follows */ state = S_ID; break;/* go to 'identifier' state */ } /* otherwise */ this.ungetc(c); /* put back last character, */ this.value = s.toString().substring(0,s.length()-1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -