📄 testboostingtermquery.java

📁 lucene2.2.0版本
💻 JAVA
字号:
package org.apache.lucene.search.payloads;/** * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import junit.framework.TestCase;import org.apache.lucene.analysis.*;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Payload;import org.apache.lucene.index.Term;import org.apache.lucene.search.*;import org.apache.lucene.search.spans.Spans;import org.apache.lucene.search.spans.TermSpans;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.English;import java.io.IOException;import java.io.Reader;public class TestBoostingTermQuery extends TestCase {  private IndexSearcher searcher;  private BoostingSimilarity similarity = new BoostingSimilarity();  private byte[] payloadField = new byte[]{1};  private byte[] payloadMultiField1 = new byte[]{2};  private byte[] payloadMultiField2 = new byte[]{4};  public TestBoostingTermQuery(String s) {    super(s);  }  private class PayloadAnalyzer extends Analyzer {    public TokenStream tokenStream(String fieldName, Reader reader) {      TokenStream result = new LowerCaseTokenizer(reader);      result = new PayloadFilter(result, fieldName);      return result;    }  }  private class PayloadFilter extends TokenFilter {    String fieldName;    int numSeen = 0;    public PayloadFilter(TokenStream input, String fieldName) {      super(input);      this.fieldName = fieldName;    }    public Token next() throws IOException {      Token result = input.next();      if (result != null) {        if (fieldName.equals("field"))        {          result.setPayload(new Payload(payloadField));        }        else if (fieldName.equals("multiField"))        {          if (numSeen  % 2 == 0)          {            result.setPayload(new Payload(payloadMultiField1));          }          else          {            result.setPayload(new Payload(payloadMultiField2));          }          numSeen++;        }      }      return result;    }  }  protected void setUp() throws IOException {    RAMDirectory directory = new RAMDirectory();    PayloadAnalyzer analyzer = new PayloadAnalyzer();    IndexWriter writer            = new IndexWriter(directory, analyzer, true);    writer.setSimilarity(similarity);    //writer.infoStream = System.out;    for (int i = 0; i < 1000; i++) {      Document doc = new Document();      doc.add(new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED));      doc.add(new Field("multiField", English.intToEnglish(i) + "  " + English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED));      writer.addDocument(doc);    }    //writer.optimize();    writer.close();    searcher = new IndexSearcher(directory);    searcher.setSimilarity(similarity);  }  protected void tearDown() {  }  public void test() throws IOException {    BoostingTermQuery query = new BoostingTermQuery(new Term("field", "seventy"));    TopDocs hits = searcher.search(query, null, 100);    assertTrue("hits is null and it shouldn't be", hits != null);    assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);    //they should all have the exact same score, because they all contain seventy once, and we set    //all the other similarity factors to be 1    assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1);    for (int i = 0; i < hits.scoreDocs.length; i++) {      ScoreDoc doc = hits.scoreDocs[i];      assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);    }    CheckHits.checkExplanations(query, "field", searcher, true);    Spans spans = query.getSpans(searcher.getIndexReader());    assertTrue("spans is null and it shouldn't be", spans != null);    assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);    /*float score = hits.score(0);    for (int i =1; i < hits.length(); i++)    {      assertTrue("scores are not equal and they should be", score == hits.score(i));    }*/  }  public void testMultipleMatchesPerDoc() throws Exception {    BoostingTermQuery query = new BoostingTermQuery(new Term("multiField", "seventy"));    TopDocs hits = searcher.search(query, null, 100);    assertTrue("hits is null and it shouldn't be", hits != null);    assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);    //they should all have the exact same score, because they all contain seventy once, and we set    //all the other similarity factors to be 1    //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);    assertTrue(hits.getMaxScore() + " does not equal: " + 3, hits.getMaxScore() == 3);    //there should be exactly 10 items that score a 3, all the rest should score a 2    //The 10 items are: 70 + i*100 where i in [0-9]    int numTens = 0;    for (int i = 0; i < hits.scoreDocs.length; i++) {      ScoreDoc doc = hits.scoreDocs[i];      if (doc.doc % 10 == 0)      {        numTens++;        assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);      }      else      {        assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);      }    }    assertTrue(numTens + " does not equal: " + 10, numTens == 10);    CheckHits.checkExplanations(query, "field", searcher, true);    Spans spans = query.getSpans(searcher.getIndexReader());    assertTrue("spans is null and it shouldn't be", spans != null);    assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);    //should be two matches per document    int count = 0;    //100 hits times 2 matches per hit, we should have 200 in count    while (spans.next())    {      count++;    }    assertTrue(count + " does not equal: " + 200, count == 200);  }  public void testNoMatch() throws Exception {    BoostingTermQuery query = new BoostingTermQuery(new Term("field", "junk"));    TopDocs hits = searcher.search(query, null, 100);    assertTrue("hits is null and it shouldn't be", hits != null);    assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0);  }  class BoostingSimilarity extends DefaultSimilarity  {    // TODO: Remove warning after API has been finalized    public float scorePayload(byte[] payload, int offset, int length) {      //we know it is size 4 here, so ignore the offset/length      return payload[0];    }    //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!    //Make everything else 1 so we see the effect of the payload    //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!    public float lengthNorm(String fieldName, int numTerms) {      return 1;    }    public float queryNorm(float sumOfSquaredWeights) {      return 1;    }    public float sloppyFreq(int distance) {      return 1;    }    public float coord(int overlap, int maxOverlap) {      return 1;    }    public float idf(int docFreq, int numDocs) {      return 1;    }    public float tf(float freq) {      return 1;    }  }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -