⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scanner.java

📁 数据挖掘中
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*----------------------------------------------------------------------  File    : Scanner.java  Contents: scanner (lexical analysis of a character stream)  Author  : Christian Borgelt  History : 10.05.2004 file created from file scan.c            11.05.2004 function to put back token renamed to pushBack            20.05.2004 line number reporting added            25.05.2004 pushBack added as a synonym for ungetToken            06.07.2004 bug in getc concerning EOF fixed            21.02.2005 some utility functions added            01.03.2005 optional newline token added----------------------------------------------------------------------*/package util;import java.io.InputStream;import java.io.FileInputStream;import java.io.InputStreamReader;import java.io.BufferedReader;import java.io.IOException;/*--------------------------------------------------------------------*/public class Scanner {/*--------------------------------------------------------------------*/  public  static final int EOF    =   -1; /* end of file */  /* --- tokens --- */  public  static final int T_EOF  =  256; /* end of file */  public  static final int T_NUM  =  257; /* number (floating point) */  public  static final int T_ID   =  258; /* identifier or string */  public  static final int T_RGT  =  259; /* right arrow '->' */  public  static final int T_LFT  =  260; /* left  arrow '<-' */  public  static final int T_CMP  =  261; /* two char. comparison */  /* --- character classes --- */  private static final int C_ILLEGAL = 0; /* illegal character */  private static final int C_SPACE   = 1; /* white space, e.g. ' ' */  private static final int C_LETTER  = 2; /* letter or underscore '_' */  private static final int C_DIGIT   = 3; /* digit */  private static final int C_POINT   = 4; /* point, '.' */  private static final int C_SIGN    = 5; /* sign,  '+' or '-' */  private static final int C_SLASH   = 6; /* slash, '/' */  private static final int C_QUOTE   = 7; /* quote, e.g. '"' '`' */  private static final int C_CMPOP   = 8; /* comparison operator, '<' */  private static final int C_ACTIVE  = 9; /* active characters, ',' */  /* --- scanner states --- */  private static final int S_SPACE  =  0; /* skipping white space */  private static final int S_ID     =  1; /* reading identifier */  private static final int S_NUMDIG =  2; /* number, digit */  private static final int S_NUMPT  =  3; /* number, decimal point */  private static final int S_FRAC   =  4; /* number, digit and point */  private static final int S_EXPIND =  5; /* exponent, indicator */  private static final int S_EXPSGN =  6; /* exponent, sign */  private static final int S_EXPDIG =  7; /* exponent, digit */  private static final int S_SIGN   =  8; /* sign read */  private static final int S_CMPOP  =  9; /* comparison operator */  private static final int S_STRING = 10; /* quoted string */  private static final int S_ESC    = 11; /* escaped character */  private static final int S_OCT1   = 12; /* octal  number, 1 digit */  private static final int S_OCT2   = 13; /* octal  number, 2 digits */  private static final int S_HEX1   = 14; /* hexad. number, 1 digit */  private static final int S_HEX2   = 15; /* hexad. number, 2 digits */  private static final int S_SLASH  = 16; /* slash read */  private static final int S_CPPCOM = 17; /* C++ comment */  private static final int S_CCOM1  = 18; /* C comment */  private static final int S_CCOM2  = 19; /* C comment, poss. end */  private static final int S_CCOM3  = 20; /* C comment, poss. start */  /* --- character classes --- */  private static final char ccltab[] = {          /* NUL  SOH  STX  ETX  EOT  ENQ  ACK  BEL */  /* 00 */    0,   0,   0,   0,   0,   0,   0,   0,          /*  BS   HT   LF   VT   FF   CR   SO   SI */              0,   1,   1,   1,   1,   1,   0,   0,          /* DLE  DC1  DC2  DC3  DC4  NAK  SYN  ETB */  /* 10 */    0,   0,   0,   0,   0,   0,   0,   0,          /* CAN   EM  SUB  ESC   FS   GS   RS   US */              0,   0,   0,   0,   0,   0,   0,   0,          /* ' '  '!'  '"'  '#'  '$'  '%'  '&'  ''' */  /* 20 */    1,   8,   7,   9,   9,   9,   9,   7,          /* '('  ')'  '*'  '+'  ','  '-'  '.'  '/' */              9,   9,   9,   5,   9,   5,   4,   6,          /* '0'  '1'  '2'  '3'  '4'  '5'  '6'  '7' */  /* 30 */    3,   3,   3,   3,   3,   3,   3,   3,          /* '8'  '9'  ':'  ';'  '<'  '='  '>'  '?' */              3,   3,   9,   9,   8,   8,   8,   9,          /* '@'  'A'  'B'  'C'  'D'  'E'  'F'  'G' */  /* 40 */    0,   2,   2,   2,   2,   2,   2,   2,          /* 'H'  'I'  'J'  'K'  'L'  'M'  'N'  'O' */              2,   2,   2,   2,   2,   2,   2,   2,          /* 'P'  'Q'  'R'  'S'  'T'  'U'  'V'  'W' */  /* 50 */    2,   2,   2,   2,   2,   2,   2,   2,          /* 'X'  'Y'  'Z'  '['  '\'  ']'  '^'  '_' */              2,   2,   2,   9,   9,   9,   9,   2,          /* '`'  'a'  'b'  'c'  'd'  'e'  'f'  'g' */  /* 60 */    7,   2,   2,   2,   2,   2,   2,   2,          /* 'h'  'i'  'j'  'k'  'l'  'm'  'n'  'o' */              2,   2,   2,   2,   2,   2,   2,   2,          /* 'p'  'q'  'r'  's'  't'  'u'  'v'  'w' */  /* 70 */    2,   2,   2,   2,   2,   2,   2,   2,          /* 'x'  'y'  'z'  '{'  '|'  '}'  '~'  DEL */              2,   2,   2,   9,   9,   9,   9,   0,  /* 80 */    0,   0,   0,   0,   0,   0,   0,   0,              0,   0,   0,   0,   0,   0,   0,   0,  /* 90 */    0,   0,   0,   0,   0,   0,   0,   0,              0,   0,   0,   0,   0,   0,   0,   0,  /* a0 */    0,   0,   0,   0,   0,   0,   0,   0,              0,   0,   0,   0,   0,   0,   0,   0,  /* b0 */    0,   0,   0,   0,   0,   0,   0,   0,              0,   0,   0,   0,   0,   0,   0,   0,  /* c0 */    0,   0,   0,   0,   0,   0,   0,   0,              0,   0,   0,   0,   0,   0,   0,   0,  /* d0 */    0,   0,   0,   0,   0,   0,   0,   0,              0,   0,   0,   0,   0,   0,   0,   0,  /* e0 */    0,   0,   0,   0,   0,   0,   0,   0,              0,   0,   0,   0,   0,   0,   0,   0,  /* f0 */    0,   0,   0,   0,   0,   0,   0,   0,              0,   0,   0,   0,   0,   0,   0,   0 };  /* --- instance variables --- */  private BufferedReader reader;  /* reader for input file */  private String         curr;    /* current input line */  private int            pos;     /* current position in input line */  private int            cbuf;    /* buffer for unget character */  private boolean        back;    /* flag for unget token */  private boolean        nltok;   /* for for newline tokens */  public  int            ttype;   /* current token type */  public  String         value;   /* current token value */  public  int            line;    /* current line number */  /*------------------------------------------------------------------*/  public Scanner (String s)  {                             /* --- create a scanner for a string */    this.reader = null;         /* clear the file reader */    this.curr   = s;            /* store the line to scan */    this.cbuf   = -1;           /* there is no buffered character */    this.back   = false;        /* and no buffered token */    this.nltok  = false;        /* newline '\n' is not a token */    this.line   = 1;            /* initialize the line number */  }  /* Scanner() */  /*------------------------------------------------------------------*/  public Scanner (InputStream f) throws IOException  {                             /* --- create a scanner for a file */    this.reader = new BufferedReader(new InputStreamReader(f));    this.curr   = this.reader.readLine();    if (this.curr != null) this.curr += '\n';    this.cbuf   = -1;           /* read the first line of the file */    this.back   = false;        /* the buffers are both empty */    this.nltok  = false;        /* newline '\n' is not a token */    this.line   = 1;            /* initialize the line number */  }  /* Scanner() */  /*------------------------------------------------------------------*/  public String lno () { return " (line " +this.line +")"; }  /*------------------------------------------------------------------*/  public void nlToken (boolean flag) { this.nltok = flag; }  /*------------------------------------------------------------------*/  private int getc () throws IOException  {                             /* --- get the next character */    int c;                      /* next character */    if (this.cbuf >= 0) {       /* check for a buffered character */      c = (char)this.cbuf; this.cbuf = -1; return c; }    if (this.curr == null) return EOF;    while (this.pos >= this.curr.length()) {      if (this.reader == null) return EOF;      this.curr = this.reader.readLine();      if (this.curr   == null) return EOF;      this.curr += '\n';        /* if the current line is processed, */      this.pos   = 0;           /* read a new line and reset position */    }    c = this.curr.charAt(this.pos++);    if (c == '\n') this.line++; /* count a new line and */    return c;                   /* return the next character */  }  /* getc() */  /*------------------------------------------------------------------*/  private void ungetc (int c) { this.cbuf = c; }  /*------------------------------------------------------------------*/  public int nextToken () throws IOException  {                             /* --- get next token */    int  c, ccl;                /* character and character class */    int  quote = 0;             /* quote at the start of a string */    int  ec    = 0;             /* escaped character */    int  state = 0;             /* state of automaton */    int  level = 0;             /* comment nesting level */    StringBuffer s;             /* buffer for token value */    if (this.back) {            /* check for a returned token */      this.back = false; return this.ttype; }    s = new StringBuffer();     /* get a buffer for the token value */    while (true) {              /* read loop */      c   = this.getc();        /* get character and character class */      ccl = (c < 0) ? -1 : ccltab[c];      switch (state) {          /* evaluate state of automaton */        case S_SPACE:           /* --- skip white space */          switch (ccl) {        /* evaluate character category */            case C_SPACE : if ((c == '\n') && this.nltok) {                             s.append((char)c);                             this.value = s.toString();                             return this.ttype = c;                           }    /* if newline is a token, return it, */                           break;            /* otherwise do nothing */            case C_LETTER: s.append((char)c); state = S_ID;     break;            case C_DIGIT : s.append((char)c); state = S_NUMDIG; break;            case C_POINT : s.append((char)c); state = S_NUMPT;  break;            case C_SIGN  : s.append((char)c); state = S_SIGN;   break;            case C_CMPOP : s.append((char)c); state = S_CMPOP;  break;            case C_QUOTE : quote = c;         state = S_STRING; break;            case C_SLASH :                    state = S_SLASH;  break;            case C_ACTIVE: s.append((char)c);                           this.value = s.toString();                           return this.ttype = c;            case EOF     : this.value = "<eof>";                           return this.ttype = T_EOF;            default      : s.append((char)c);                           this.value = s.toString();                           throw new IOException("illegal character '"                             +(char)c +"'" +this.lno());          } break;        case S_ID:              /* --- identifier (letter read) */          if ((ccl == C_LETTER) /* if another letter */          ||  (ccl == C_DIGIT)  /* or a digit */          ||  (ccl == C_POINT)  /* or a decimal point */          ||  (ccl == C_SIGN)){ /* or a sign follows */            s.append((char)c); break; } /* buffer character */          this.ungetc(c);       /* put back last character */          this.value = s.toString();          return this.ttype = T_ID;  /* return 'identifier' */        case S_NUMDIG:          /* --- number (digit read) */          s.append((char)c);    /* buffer character */          if  (ccl == C_DIGIT)  /* if another digit follows, */            break;              /* do nothing */          if  (ccl == C_POINT){ /* if a decimal point follows, */            state = S_FRAC;   break; } /* go to 'fraction' state */          if ((c == 'e')        /* if an exponent indicator follows */          ||  (c == 'E')) {     /* (lower- or uppercase), */            state = S_EXPIND; break; } /* go to 'exponent' state */          if ((ccl == C_LETTER) /* if a letter */          ||  (ccl == C_SIGN)){ /* or a sign follows, */            state = S_ID; break;/* go to 'identifier' state */          }                     /* otherwise */          this.ungetc(c);       /* put back last character */          this.value = s.toString().substring(0,s.length()-1);          return this.ttype = T_NUM;   /* return 'number' */        case S_NUMPT:           /* --- number (point read) */          s.append((char)c);    /* buffer character */          if  (ccl == C_DIGIT) {       /* if a digit follows, */            state = S_FRAC; break; }   /* go to 'fraction' state */          if ((ccl == C_LETTER) /* if a letter */          ||  (ccl == C_POINT)  /* or a decimal point */          ||  (ccl == C_SIGN)){ /* or a sign follows */            state = S_ID; break;/* go to 'identifier' state */          }                     /* otherwise */          this.ungetc(c);       /* put back last character, */          this.value = s.toString().substring(0,s.length()-1);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -