⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ngramtokenizertest.java

📁 一套java版本的搜索引擎源码
💻 JAVA
字号:
package org.apache.lucene.analysis.ngram;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.analysis.Token;import java.io.StringReader;import java.util.ArrayList;import junit.framework.TestCase;/** * Tests {@link NGramTokenizer} for correctness. * @author Otis Gospodnetic */public class NGramTokenizerTest extends TestCase {    private StringReader input;    private ArrayList tokens = new ArrayList();        public void setUp() {        input = new StringReader("abcde");    }    public void testInvalidInput() throws Exception {        boolean gotException = false;        try {                    new NGramTokenizer(input, 2, 1);        } catch (IllegalArgumentException e) {            gotException = true;        }        assertTrue(gotException);    }    public void testInvalidInput2() throws Exception {        boolean gotException = false;        try {                    new NGramTokenizer(input, 0, 1);        } catch (IllegalArgumentException e) {            gotException = true;        }        assertTrue(gotException);    }    public void testUnigrams() throws Exception {        NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1);                Token token = null;        do {             token = tokenizer.next();            if (token != null) {                tokens.add(token.toString());//                System.out.println(token.termText());//                System.out.println(token);//                Thread.sleep(1000);            }        } while (token != null);        assertEquals(5, tokens.size());        ArrayList exp = new ArrayList();        exp.add("(a,0,1)"); exp.add("(b,1,2)"); exp.add("(c,2,3)"); exp.add("(d,3,4)"); exp.add("(e,4,5)");        assertEquals(exp, tokens);    }    public void testBigrams() throws Exception {        NGramTokenizer tokenizer = new NGramTokenizer(input, 2, 2);                Token token = null;        do {             token = tokenizer.next();            if (token != null) {                tokens.add(token.toString());//                System.out.println(token.termText());//                System.out.println(token);//                Thread.sleep(1000);            }        } while (token != null);        assertEquals(4, tokens.size());        ArrayList exp = new ArrayList();        exp.add("(ab,0,2)"); exp.add("(bc,1,3)"); exp.add("(cd,2,4)"); exp.add("(de,3,5)");        assertEquals(exp, tokens);    }    public void testNgrams() throws Exception {        NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 3);                Token token = null;        do {             token = tokenizer.next();            if (token != null) {                tokens.add(token.toString());//                System.out.println(token.termText());//                System.out.println(token);//                Thread.sleep(1000);            }        } while (token != null);        assertEquals(12, tokens.size());        ArrayList exp = new ArrayList();        exp.add("(a,0,1)"); exp.add("(b,1,2)"); exp.add("(c,2,3)"); exp.add("(d,3,4)"); exp.add("(e,4,5)");        exp.add("(ab,0,2)"); exp.add("(bc,1,3)"); exp.add("(cd,2,4)"); exp.add("(de,3,5)");        exp.add("(abc,0,3)"); exp.add("(bcd,1,4)"); exp.add("(cde,2,5)");        assertEquals(exp, tokens);    }    public void testOversizedNgrams() throws Exception {        NGramTokenizer tokenizer = new NGramTokenizer(input, 6, 7);                Token token = null;        do {             token = tokenizer.next();            if (token != null) {                tokens.add(token.toString());//                System.out.println(token.termText());//                System.out.println(token);//                Thread.sleep(1000);            }        } while (token != null);        assertTrue(tokens.isEmpty());    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -