📄 testboostingtermquery.java
字号:
package org.apache.lucene.search.payloads;/** * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import junit.framework.TestCase;import org.apache.lucene.analysis.*;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Payload;import org.apache.lucene.index.Term;import org.apache.lucene.search.*;import org.apache.lucene.search.spans.Spans;import org.apache.lucene.search.spans.TermSpans;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.English;import java.io.IOException;import java.io.Reader;public class TestBoostingTermQuery extends TestCase { private IndexSearcher searcher; private BoostingSimilarity similarity = new BoostingSimilarity(); private byte[] payloadField = new byte[]{1}; private byte[] payloadMultiField1 = new byte[]{2}; private byte[] payloadMultiField2 = new byte[]{4}; public TestBoostingTermQuery(String s) { super(s); } private class PayloadAnalyzer extends Analyzer { public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new LowerCaseTokenizer(reader); result = new PayloadFilter(result, fieldName); return result; } } private class PayloadFilter extends TokenFilter { String fieldName; int numSeen = 0; public PayloadFilter(TokenStream input, String fieldName) { super(input); this.fieldName = fieldName; } public Token next() throws IOException { Token result = input.next(); if (result != null) { if (fieldName.equals("field")) { result.setPayload(new Payload(payloadField)); } else if (fieldName.equals("multiField")) { if (numSeen % 2 == 0) { result.setPayload(new Payload(payloadMultiField1)); } else { result.setPayload(new Payload(payloadMultiField2)); } numSeen++; } } return result; } } protected void setUp() throws IOException { RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); writer.setSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.add(new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("multiField", English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); } //writer.optimize(); writer.close(); searcher = new IndexSearcher(directory); searcher.setSimilarity(similarity); } protected void tearDown() { } public void test() throws IOException { BoostingTermQuery query = new BoostingTermQuery(new Term("field", "seventy")); TopDocs hits = searcher.search(query, null, 100); assertTrue("hits is null and it shouldn't be", hits != null); assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100); //they should all have the exact same score, because they all contain seventy once, and we set //all the other similarity factors to be 1 assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1); for (int i = 0; i < hits.scoreDocs.length; i++) { ScoreDoc doc = hits.scoreDocs[i]; assertTrue(doc.score + " does not equal: " + 1, doc.score == 1); } CheckHits.checkExplanations(query, "field", searcher, true); Spans spans = query.getSpans(searcher.getIndexReader()); assertTrue("spans is null and it shouldn't be", spans != null); assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans); /*float score = hits.score(0); for (int i =1; i < hits.length(); i++) { assertTrue("scores are not equal and they should be", score == hits.score(i)); }*/ } public void testMultipleMatchesPerDoc() throws Exception { BoostingTermQuery query = new BoostingTermQuery(new Term("multiField", "seventy")); TopDocs hits = searcher.search(query, null, 100); assertTrue("hits is null and it shouldn't be", hits != null); assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100); //they should all have the exact same score, because they all contain seventy once, and we set //all the other similarity factors to be 1 //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash); assertTrue(hits.getMaxScore() + " does not equal: " + 3, hits.getMaxScore() == 3); //there should be exactly 10 items that score a 3, all the rest should score a 2 //The 10 items are: 70 + i*100 where i in [0-9] int numTens = 0; for (int i = 0; i < hits.scoreDocs.length; i++) { ScoreDoc doc = hits.scoreDocs[i]; if (doc.doc % 10 == 0) { numTens++; assertTrue(doc.score + " does not equal: " + 3, doc.score == 3); } else { assertTrue(doc.score + " does not equal: " + 2, doc.score == 2); } } assertTrue(numTens + " does not equal: " + 10, numTens == 10); CheckHits.checkExplanations(query, "field", searcher, true); Spans spans = query.getSpans(searcher.getIndexReader()); assertTrue("spans is null and it shouldn't be", spans != null); assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans); //should be two matches per document int count = 0; //100 hits times 2 matches per hit, we should have 200 in count while (spans.next()) { count++; } assertTrue(count + " does not equal: " + 200, count == 200); } public void testNoMatch() throws Exception { BoostingTermQuery query = new BoostingTermQuery(new Term("field", "junk")); TopDocs hits = searcher.search(query, null, 100); assertTrue("hits is null and it shouldn't be", hits != null); assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0); } class BoostingSimilarity extends DefaultSimilarity { // TODO: Remove warning after API has been finalized public float scorePayload(byte[] payload, int offset, int length) { //we know it is size 4 here, so ignore the offset/length return payload[0]; } //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //Make everything else 1 so we see the effect of the payload //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! public float lengthNorm(String fieldName, int numTerms) { return 1; } public float queryNorm(float sumOfSquaredWeights) { return 1; } public float sloppyFreq(int distance) { return 1; } public float coord(int overlap, int maxOverlap) { return 1; } public float idf(int docFreq, int numDocs) { return 1; } public float tf(float freq) { return 1; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -