⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 standardtokenizer.java

📁 一套java版本的搜索引擎源码
💻 JAVA
字号:
/* Generated By:JavaCC: Do not edit this line. StandardTokenizer.java */package org.apache.lucene.analysis.standard;import java.io.*;/** A grammar-based tokenizer constructed with JavaCC. * * <p> This should be a good tokenizer for most European-language documents: * * <ul> *   <li>Splits words at punctuation characters, removing punctuation. However, a  *     dot that's not followed by whitespace is considered part of a token. *   <li>Splits words at hyphens, unless there's a number in the token, in which case *     the whole token is interpreted as a product number and is not split. *   <li>Recognizes email addresses and internet hostnames as one token. * </ul> * * <p>Many applications have specific tokenizer needs.  If this tokenizer does * not suit your application, please consider copying this source code * directory to your project and maintaining your own grammar-based tokenizer. */public class StandardTokenizer extends org.apache.lucene.analysis.Tokenizer implements StandardTokenizerConstants {  /** Constructs a tokenizer for this Reader. */  public StandardTokenizer(Reader reader) {    this(new FastCharStream(reader));    this.input = reader;  }/** Returns the next token in the stream, or null at EOS. * <p>The returned token's type is set to an element of {@link * StandardTokenizerConstants#tokenImage}. */  final public org.apache.lucene.analysis.Token next() throws ParseException, IOException {  Token token = null;    switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {    case ALPHANUM:      token = jj_consume_token(ALPHANUM);      break;    case APOSTROPHE:      token = jj_consume_token(APOSTROPHE);      break;    case ACRONYM:      token = jj_consume_token(ACRONYM);      break;    case COMPANY:      token = jj_consume_token(COMPANY);      break;    case EMAIL:      token = jj_consume_token(EMAIL);      break;    case HOST:      token = jj_consume_token(HOST);      break;    case NUM:      token = jj_consume_token(NUM);      break;    case CJ:      token = jj_consume_token(CJ);      break;    case 0:      token = jj_consume_token(0);      break;    default:      jj_la1[0] = jj_gen;      jj_consume_token(-1);      throw new ParseException();    }      if (token.kind == EOF) {        {if (true) return null;}      } else {        {if (true) return          new org.apache.lucene.analysis.Token(token.image,                                        token.beginColumn,token.endColumn,                                        tokenImage[token.kind]);}      }    throw new Error("Missing return statement in function");  }  public StandardTokenizerTokenManager token_source;  public Token token, jj_nt;  private int jj_ntk;  private int jj_gen;  final private int[] jj_la1 = new int[1];  static private int[] jj_la1_0;  static {      jj_la1_0();   }   private static void jj_la1_0() {      jj_la1_0 = new int[] {0x10ff,};   }  public StandardTokenizer(CharStream stream) {    token_source = new StandardTokenizerTokenManager(stream);    token = new Token();    jj_ntk = -1;    jj_gen = 0;    for (int i = 0; i < 1; i++) jj_la1[i] = -1;  }  public void ReInit(CharStream stream) {    token_source.ReInit(stream);    token = new Token();    jj_ntk = -1;    jj_gen = 0;    for (int i = 0; i < 1; i++) jj_la1[i] = -1;  }  public StandardTokenizer(StandardTokenizerTokenManager tm) {    token_source = tm;    token = new Token();    jj_ntk = -1;    jj_gen = 0;    for (int i = 0; i < 1; i++) jj_la1[i] = -1;  }  public void ReInit(StandardTokenizerTokenManager tm) {    token_source = tm;    token = new Token();    jj_ntk = -1;    jj_gen = 0;    for (int i = 0; i < 1; i++) jj_la1[i] = -1;  }  final private Token jj_consume_token(int kind) throws ParseException {    Token oldToken;    if ((oldToken = token).next != null) token = token.next;    else token = token.next = token_source.getNextToken();    jj_ntk = -1;    if (token.kind == kind) {      jj_gen++;      return token;    }    token = oldToken;    jj_kind = kind;    throw generateParseException();  }  final public Token getNextToken() {    if (token.next != null) token = token.next;    else token = token.next = token_source.getNextToken();    jj_ntk = -1;    jj_gen++;    return token;  }  final public Token getToken(int index) {    Token t = token;    for (int i = 0; i < index; i++) {      if (t.next != null) t = t.next;      else t = t.next = token_source.getNextToken();    }    return t;  }  final private int jj_ntk() {    if ((jj_nt=token.next) == null)      return (jj_ntk = (token.next=token_source.getNextToken()).kind);    else      return (jj_ntk = jj_nt.kind);  }  private java.util.Vector jj_expentries = new java.util.Vector();  private int[] jj_expentry;  private int jj_kind = -1;  public ParseException generateParseException() {    jj_expentries.removeAllElements();    boolean[] la1tokens = new boolean[16];    for (int i = 0; i < 16; i++) {      la1tokens[i] = false;    }    if (jj_kind >= 0) {      la1tokens[jj_kind] = true;      jj_kind = -1;    }    for (int i = 0; i < 1; i++) {      if (jj_la1[i] == jj_gen) {        for (int j = 0; j < 32; j++) {          if ((jj_la1_0[i] & (1<<j)) != 0) {            la1tokens[j] = true;          }        }      }    }    for (int i = 0; i < 16; i++) {      if (la1tokens[i]) {        jj_expentry = new int[1];        jj_expentry[0] = i;        jj_expentries.addElement(jj_expentry);      }    }    int[][] exptokseq = new int[jj_expentries.size()][];    for (int i = 0; i < jj_expentries.size(); i++) {      exptokseq[i] = (int[])jj_expentries.elementAt(i);    }    return new ParseException(token, exptokseq, tokenImage);  }  final public void enable_tracing() {  }  final public void disable_tracing() {  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -