📄 testphrasequery.java

📁 Lucene a java open-source SearchEngine Framework
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
package org.apache.lucene.search;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.util.LuceneTestCase;import org.apache.lucene.analysis.*;import org.apache.lucene.document.*;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import java.io.IOException;import java.io.Reader;/** * Tests {@link PhraseQuery}. * * @see TestPositionIncrement * @author Erik Hatcher */public class TestPhraseQuery extends LuceneTestCase {  /** threshold for comparing floats */  public static final float SCORE_COMP_THRESH = 1e-6f;    private IndexSearcher searcher;  private PhraseQuery query;  private RAMDirectory directory;  public void setUp() throws Exception {    super.setUp();    directory = new RAMDirectory();    Analyzer analyzer = new Analyzer() {      public TokenStream tokenStream(String fieldName, Reader reader) {        return new WhitespaceTokenizer(reader);      }      public int getPositionIncrementGap(String fieldName) {        return 100;      }    };    IndexWriter writer = new IndexWriter(directory, analyzer, true);        Document doc = new Document();    doc.add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.TOKENIZED));    doc.add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.TOKENIZED));    Fieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED);    doc.add(repeatedField);    doc.add(new Field("palindrome", "one two three two one", Field.Store.YES, Field.Index.TOKENIZED));    writer.addDocument(doc);        doc = new Document();    doc.add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.TOKENIZED));    writer.addDocument(doc);        doc = new Document();    doc.add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.TOKENIZED));    writer.addDocument(doc);    writer.optimize();    writer.close();    searcher = new IndexSearcher(directory);    query = new PhraseQuery();  }  public void tearDown() throws Exception {    super.tearDown();    searcher.close();    directory.close();  }  public void testNotCloseEnough() throws Exception {    query.setSlop(2);    query.add(new Term("field", "one"));    query.add(new Term("field", "five"));    Hits hits = searcher.search(query);    assertEquals(0, hits.length());    QueryUtils.check(query,searcher);  }  public void testBarelyCloseEnough() throws Exception {    query.setSlop(3);    query.add(new Term("field", "one"));    query.add(new Term("field", "five"));    Hits hits = searcher.search(query);    assertEquals(1, hits.length());    QueryUtils.check(query,searcher);  }  /**   * Ensures slop of 0 works for exact matches, but not reversed   */  public void testExact() throws Exception {    // slop is zero by default    query.add(new Term("field", "four"));    query.add(new Term("field", "five"));    Hits hits = searcher.search(query);    assertEquals("exact match", 1, hits.length());    QueryUtils.check(query,searcher);    query = new PhraseQuery();    query.add(new Term("field", "two"));    query.add(new Term("field", "one"));    hits = searcher.search(query);    assertEquals("reverse not exact", 0, hits.length());    QueryUtils.check(query,searcher);  }  public void testSlop1() throws Exception {    // Ensures slop of 1 works with terms in order.    query.setSlop(1);    query.add(new Term("field", "one"));    query.add(new Term("field", "two"));    Hits hits = searcher.search(query);    assertEquals("in order", 1, hits.length());    QueryUtils.check(query,searcher);    // Ensures slop of 1 does not work for phrases out of order;    // must be at least 2.    query = new PhraseQuery();    query.setSlop(1);    query.add(new Term("field", "two"));    query.add(new Term("field", "one"));    hits = searcher.search(query);    assertEquals("reversed, slop not 2 or more", 0, hits.length());    QueryUtils.check(query,searcher);  }  /**   * As long as slop is at least 2, terms can be reversed   */  public void testOrderDoesntMatter() throws Exception {    query.setSlop(2); // must be at least two for reverse order match    query.add(new Term("field", "two"));    query.add(new Term("field", "one"));    Hits hits = searcher.search(query);    assertEquals("just sloppy enough", 1, hits.length());    QueryUtils.check(query,searcher);    query = new PhraseQuery();    query.setSlop(2);    query.add(new Term("field", "three"));    query.add(new Term("field", "one"));    hits = searcher.search(query);    assertEquals("not sloppy enough", 0, hits.length());    QueryUtils.check(query,searcher);  }  /**   * slop is the total number of positional moves allowed   * to line up a phrase   */  public void testMulipleTerms() throws Exception {    query.setSlop(2);    query.add(new Term("field", "one"));    query.add(new Term("field", "three"));    query.add(new Term("field", "five"));    Hits hits = searcher.search(query);    assertEquals("two total moves", 1, hits.length());    QueryUtils.check(query,searcher);    query = new PhraseQuery();    query.setSlop(5); // it takes six moves to match this phrase    query.add(new Term("field", "five"));    query.add(new Term("field", "three"));    query.add(new Term("field", "one"));    hits = searcher.search(query);    assertEquals("slop of 5 not close enough", 0, hits.length());    QueryUtils.check(query,searcher);    query.setSlop(6);    hits = searcher.search(query);    assertEquals("slop of 6 just right", 1, hits.length());    QueryUtils.check(query,searcher);  }    public void testPhraseQueryWithStopAnalyzer() throws Exception {    RAMDirectory directory = new RAMDirectory();    StopAnalyzer stopAnalyzer = new StopAnalyzer();    IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true);    Document doc = new Document();    doc.add(new Field("field", "the stop words are here", Field.Store.YES, Field.Index.TOKENIZED));    writer.addDocument(doc);    writer.close();    IndexSearcher searcher = new IndexSearcher(directory);    // valid exact phrase query    PhraseQuery query = new PhraseQuery();    query.add(new Term("field","stop"));    query.add(new Term("field","words"));    Hits hits = searcher.search(query);    assertEquals(1, hits.length());    QueryUtils.check(query,searcher);    // currently StopAnalyzer does not leave "holes", so this matches.    query = new PhraseQuery();    query.add(new Term("field", "words"));    query.add(new Term("field", "here"));    hits = searcher.search(query);    assertEquals(1, hits.length());    QueryUtils.check(query,searcher);    searcher.close();  }    public void testPhraseQueryInConjunctionScorer() throws Exception {    RAMDirectory directory = new RAMDirectory();    IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);        Document doc = new Document();    doc.add(new Field("source", "marketing info", Field.Store.YES, Field.Index.TOKENIZED));    writer.addDocument(doc);        doc = new Document();    doc.add(new Field("contents", "foobar", Field.Store.YES, Field.Index.TOKENIZED));    doc.add(new Field("source", "marketing info", Field.Store.YES, Field.Index.TOKENIZED));     writer.addDocument(doc);        writer.optimize();    writer.close();        IndexSearcher searcher = new IndexSearcher(directory);        PhraseQuery phraseQuery = new PhraseQuery();    phraseQuery.add(new Term("source", "marketing"));    phraseQuery.add(new Term("source", "info"));    Hits hits = searcher.search(phraseQuery);    assertEquals(2, hits.length());    QueryUtils.check(phraseQuery,searcher);        TermQuery termQuery = new TermQuery(new Term("contents","foobar"));    BooleanQuery booleanQuery = new BooleanQuery();    booleanQuery.add(termQuery, BooleanClause.Occur.MUST);    booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);    hits = searcher.search(booleanQuery);    assertEquals(1, hits.length());    QueryUtils.check(termQuery,searcher);        searcher.close();        writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);    doc = new Document();    doc.add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.TOKENIZED));
12 下一页
💿 文件大小 5390 K
👤 上传用户 rickie936
📂 所属分类 Java编程
🏷️ 相关标签

#SearchEngine #open-source #Framework #Lucene
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -