📄 testphrasequery.java
字号:
package org.apache.lucene.search;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.util.LuceneTestCase;import org.apache.lucene.analysis.*;import org.apache.lucene.document.*;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import java.io.IOException;import java.io.Reader;/** * Tests {@link PhraseQuery}. * * @see TestPositionIncrement * @author Erik Hatcher */public class TestPhraseQuery extends LuceneTestCase { /** threshold for comparing floats */ public static final float SCORE_COMP_THRESH = 1e-6f; private IndexSearcher searcher; private PhraseQuery query; private RAMDirectory directory; public void setUp() throws Exception { super.setUp(); directory = new RAMDirectory(); Analyzer analyzer = new Analyzer() { public TokenStream tokenStream(String fieldName, Reader reader) { return new WhitespaceTokenizer(reader); } public int getPositionIncrementGap(String fieldName) { return 100; } }; IndexWriter writer = new IndexWriter(directory, analyzer, true); Document doc = new Document(); doc.add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.TOKENIZED)); Fieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED); doc.add(repeatedField); doc.add(new Field("palindrome", "one two three two one", Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); writer.optimize(); writer.close(); searcher = new IndexSearcher(directory); query = new PhraseQuery(); } public void tearDown() throws Exception { super.tearDown(); searcher.close(); directory.close(); } public void testNotCloseEnough() throws Exception { query.setSlop(2); query.add(new Term("field", "one")); query.add(new Term("field", "five")); Hits hits = searcher.search(query); assertEquals(0, hits.length()); QueryUtils.check(query,searcher); } public void testBarelyCloseEnough() throws Exception { query.setSlop(3); query.add(new Term("field", "one")); query.add(new Term("field", "five")); Hits hits = searcher.search(query); assertEquals(1, hits.length()); QueryUtils.check(query,searcher); } /** * Ensures slop of 0 works for exact matches, but not reversed */ public void testExact() throws Exception { // slop is zero by default query.add(new Term("field", "four")); query.add(new Term("field", "five")); Hits hits = searcher.search(query); assertEquals("exact match", 1, hits.length()); QueryUtils.check(query,searcher); query = new PhraseQuery(); query.add(new Term("field", "two")); query.add(new Term("field", "one")); hits = searcher.search(query); assertEquals("reverse not exact", 0, hits.length()); QueryUtils.check(query,searcher); } public void testSlop1() throws Exception { // Ensures slop of 1 works with terms in order. query.setSlop(1); query.add(new Term("field", "one")); query.add(new Term("field", "two")); Hits hits = searcher.search(query); assertEquals("in order", 1, hits.length()); QueryUtils.check(query,searcher); // Ensures slop of 1 does not work for phrases out of order; // must be at least 2. query = new PhraseQuery(); query.setSlop(1); query.add(new Term("field", "two")); query.add(new Term("field", "one")); hits = searcher.search(query); assertEquals("reversed, slop not 2 or more", 0, hits.length()); QueryUtils.check(query,searcher); } /** * As long as slop is at least 2, terms can be reversed */ public void testOrderDoesntMatter() throws Exception { query.setSlop(2); // must be at least two for reverse order match query.add(new Term("field", "two")); query.add(new Term("field", "one")); Hits hits = searcher.search(query); assertEquals("just sloppy enough", 1, hits.length()); QueryUtils.check(query,searcher); query = new PhraseQuery(); query.setSlop(2); query.add(new Term("field", "three")); query.add(new Term("field", "one")); hits = searcher.search(query); assertEquals("not sloppy enough", 0, hits.length()); QueryUtils.check(query,searcher); } /** * slop is the total number of positional moves allowed * to line up a phrase */ public void testMulipleTerms() throws Exception { query.setSlop(2); query.add(new Term("field", "one")); query.add(new Term("field", "three")); query.add(new Term("field", "five")); Hits hits = searcher.search(query); assertEquals("two total moves", 1, hits.length()); QueryUtils.check(query,searcher); query = new PhraseQuery(); query.setSlop(5); // it takes six moves to match this phrase query.add(new Term("field", "five")); query.add(new Term("field", "three")); query.add(new Term("field", "one")); hits = searcher.search(query); assertEquals("slop of 5 not close enough", 0, hits.length()); QueryUtils.check(query,searcher); query.setSlop(6); hits = searcher.search(query); assertEquals("slop of 6 just right", 1, hits.length()); QueryUtils.check(query,searcher); } public void testPhraseQueryWithStopAnalyzer() throws Exception { RAMDirectory directory = new RAMDirectory(); StopAnalyzer stopAnalyzer = new StopAnalyzer(); IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true); Document doc = new Document(); doc.add(new Field("field", "the stop words are here", Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); writer.close(); IndexSearcher searcher = new IndexSearcher(directory); // valid exact phrase query PhraseQuery query = new PhraseQuery(); query.add(new Term("field","stop")); query.add(new Term("field","words")); Hits hits = searcher.search(query); assertEquals(1, hits.length()); QueryUtils.check(query,searcher); // currently StopAnalyzer does not leave "holes", so this matches. query = new PhraseQuery(); query.add(new Term("field", "words")); query.add(new Term("field", "here")); hits = searcher.search(query); assertEquals(1, hits.length()); QueryUtils.check(query,searcher); searcher.close(); } public void testPhraseQueryInConjunctionScorer() throws Exception { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.add(new Field("source", "marketing info", Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("contents", "foobar", Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("source", "marketing info", Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); writer.optimize(); writer.close(); IndexSearcher searcher = new IndexSearcher(directory); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.add(new Term("source", "marketing")); phraseQuery.add(new Term("source", "info")); Hits hits = searcher.search(phraseQuery); assertEquals(2, hits.length()); QueryUtils.check(phraseQuery,searcher); TermQuery termQuery = new TermQuery(new Term("contents","foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(termQuery, BooleanClause.Occur.MUST); booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST); hits = searcher.search(booleanQuery); assertEquals(1, hits.length()); QueryUtils.check(termQuery,searcher); searcher.close(); writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); doc = new Document(); doc.add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.TOKENIZED));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -