📄 standardtokenizer.cs

📁 Lucene.Net 版本源码测试通过
💻 CS
字号:
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* Generated By:JavaCC: Do not edit this line. StandardTokenizer.java */
using System;

namespace Lucene.Net.Analysis.Standard
{
	
    /// <summary>A grammar-based tokenizer constructed with JavaCC.
    /// 
    /// <p> This should be a good tokenizer for most European-language documents:
    /// 
    /// <ul>
    /// <li>Splits words at punctuation characters, removing punctuation. However, a 
    /// dot that's not followed by whitespace is considered part of a token.
    /// <li>Splits words at hyphens, unless there's a number in the token, in which case
    /// the whole token is interpreted as a product number and is not split.
    /// <li>Recognizes email addresses and internet hostnames as one token.
    /// </ul>
    /// 
    /// <p>Many applications have specific tokenizer needs.  If this tokenizer does
    /// not suit your application, please consider copying this source code
    /// directory to your project and maintaining your own grammar-based tokenizer.
    /// </summary>
    public class StandardTokenizer : Lucene.Net.Analysis.Tokenizer
    {
		
        /// <summary>Constructs a tokenizer for this Reader. </summary>
        public StandardTokenizer(System.IO.TextReader reader) : this(new FastCharStream(reader))
        {
            this.input = reader;
        }
		
        /// <summary>Returns the next token in the stream, or null at EOS.
        /// <p>The returned token's type is set to an element of {@link
        /// StandardTokenizerConstants#tokenImage}.
        /// </summary>
        public override Lucene.Net.Analysis.Token Next()
        {
            Token token = null;
            switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
            {
				
                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM: 
                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM);
                    break;
				
                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE: 
                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE);
                    break;
				
                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM: 
                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM);
                    break;
				
                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY: 
                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY);
                    break;
				
                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL: 
                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL);
                    break;
				
                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST: 
                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST);
                    break;
				
                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM: 
                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM);
                    break;
				
                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ: 
                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ);
                    break;
				
                case 0: 
                    token = Jj_consume_token(0);
                    break;
				
                default: 
                    jj_la1[0] = jj_gen;
                    Jj_consume_token(- 1);
                    throw new ParseException();
				
            }
            if (token.kind == Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EOF)
            {
            {
                if (true)
                    return null;
            }
            }
            else
            {
            {
                if (true)
                    return new Lucene.Net.Analysis.Token(token.image, token.beginColumn, token.endColumn, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[token.kind]);
            }
            }
            throw new System.ApplicationException("Missing return statement in function");
        }

        /// <summary>By default, closes the input Reader. </summary>
        public override void Close() 
        { 
            token_source.Close(); 
            base.Close(); 
        }

        public StandardTokenizerTokenManager token_source;
        public Token token, jj_nt;
        private int jj_ntk;
        private int jj_gen;
        private int[] jj_la1 = new int[1];
        private static int[] jj_la1_0_Renamed_Field;
        private static void  jj_la1_0()
        {
            jj_la1_0_Renamed_Field = new int[]{0x10ff};
        }
		
        public StandardTokenizer(CharStream stream)
        {
            token_source = new StandardTokenizerTokenManager(stream);
            token = new Token();
            jj_ntk = - 1;
            jj_gen = 0;
            for (int i = 0; i < 1; i++)
                jj_la1[i] = - 1;
        }
		
        public virtual void  ReInit(CharStream stream)
        {
            token_source.ReInit(stream);
            token = new Token();
            jj_ntk = - 1;
            jj_gen = 0;
            for (int i = 0; i < 1; i++)
                jj_la1[i] = - 1;
        }
		
        public StandardTokenizer(StandardTokenizerTokenManager tm)
        {
            token_source = tm;
            token = new Token();
            jj_ntk = - 1;
            jj_gen = 0;
            for (int i = 0; i < 1; i++)
                jj_la1[i] = - 1;
        }
		
        public virtual void  ReInit(StandardTokenizerTokenManager tm)
        {
            token_source = tm;
            token = new Token();
            jj_ntk = - 1;
            jj_gen = 0;
            for (int i = 0; i < 1; i++)
                jj_la1[i] = - 1;
        }
		
        private Token Jj_consume_token(int kind)
        {
            Token oldToken;
            if ((oldToken = token).next != null)
                token = token.next;
            else
                token = token.next = token_source.GetNextToken();
            jj_ntk = - 1;
            if (token.kind == kind)
            {
                jj_gen++;
                return token;
            }
            token = oldToken;
            jj_kind = kind;
            throw GenerateParseException();
        }
		
        public Token GetNextToken()
        {
            if (token.next != null)
                token = token.next;
            else
                token = token.next = token_source.GetNextToken();
            jj_ntk = - 1;
            jj_gen++;
            return token;
        }
		
        public Token GetToken(int index)
        {
            Token t = token;
            for (int i = 0; i < index; i++)
            {
                if (t.next != null)
                    t = t.next;
                else
                    t = t.next = token_source.GetNextToken();
            }
            return t;
        }
		
        private int Jj_ntk()
        {
            if ((jj_nt = token.next) == null)
                return (jj_ntk = (token.next = token_source.GetNextToken()).kind);
            else
                return (jj_ntk = jj_nt.kind);
        }
		
        private System.Collections.ArrayList jj_expentries = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
        private int[] jj_expentry;
        private int jj_kind = - 1;
		
        public virtual ParseException GenerateParseException()
        {
            jj_expentries.Clear();
            bool[] la1tokens = new bool[16];
            for (int i = 0; i < 16; i++)
            {
                la1tokens[i] = false;
            }
            if (jj_kind >= 0)
            {
                la1tokens[jj_kind] = true;
                jj_kind = - 1;
            }
            for (int i = 0; i < 1; i++)
            {
                if (jj_la1[i] == jj_gen)
                {
                    for (int j = 0; j < 32; j++)
                    {
                        if ((jj_la1_0_Renamed_Field[i] & (1 << j)) != 0)
                        {
                            la1tokens[j] = true;
                        }
                    }
                }
            }
            for (int i = 0; i < 16; i++)
            {
                if (la1tokens[i])
                {
                    jj_expentry = new int[1];
                    jj_expentry[0] = i;
                    jj_expentries.Add(jj_expentry);
                }
            }
            int[][] exptokseq = new int[jj_expentries.Count][];
            for (int i = 0; i < jj_expentries.Count; i++)
            {
                exptokseq[i] = (int[]) jj_expentries[i];
            }
            return new ParseException(token, exptokseq, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage);
        }
		
        public void  Enable_tracing()
        {
        }
		
        public void  Disable_tracing()
        {
        }
        static StandardTokenizer()
        {
        {
            jj_la1_0();
        }
        }
    }
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -