📄 testtermvectors.java
字号:
package org.apache.lucene.search;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.util.LuceneTestCase;import org.apache.lucene.analysis.SimpleAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.*;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.English;import java.io.IOException;import java.util.HashMap;import java.util.Iterator;import java.util.Map;import java.util.SortedSet;public class TestTermVectors extends LuceneTestCase { private IndexSearcher searcher; private RAMDirectory directory = new RAMDirectory(); public TestTermVectors(String s) { super(s); } public void setUp() throws Exception { super.setUp(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true); //writer.setUseCompoundFile(true); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field.TermVector termVector; int mod3 = i % 3; int mod2 = i % 2; if (mod2 == 0 && mod3 == 0){ termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else if (mod2 == 0){ termVector = Field.TermVector.WITH_POSITIONS; } else if (mod3 == 0){ termVector = Field.TermVector.WITH_OFFSETS; } else { termVector = Field.TermVector.YES; } doc.add(new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED, termVector)); writer.addDocument(doc); } writer.close(); searcher = new IndexSearcher(directory); } public void test() { assertTrue(searcher != null); } public void testTermVectors() { Query query = new TermQuery(new Term("field", "seventy")); try { Hits hits = searcher.search(query); assertEquals(100, hits.length()); for (int i = 0; i < hits.length(); i++) { TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i)); assertTrue(vector != null); assertTrue(vector.length == 1); } } catch (IOException e) { assertTrue(false); } } public void testTermPositionVectors() { Query query = new TermQuery(new Term("field", "zero")); try { Hits hits = searcher.search(query); assertEquals(1, hits.length()); for (int i = 0; i < hits.length(); i++) { TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i)); assertTrue(vector != null); assertTrue(vector.length == 1); boolean shouldBePosVector = (hits.id(i) % 2 == 0) ? true : false; assertTrue((shouldBePosVector == false) || (shouldBePosVector == true && (vector[0] instanceof TermPositionVector == true))); boolean shouldBeOffVector = (hits.id(i) % 3 == 0) ? true : false; assertTrue((shouldBeOffVector == false) || (shouldBeOffVector == true && (vector[0] instanceof TermPositionVector == true))); if(shouldBePosVector || shouldBeOffVector){ TermPositionVector posVec = (TermPositionVector)vector[0]; String [] terms = posVec.getTerms(); assertTrue(terms != null && terms.length > 0); for (int j = 0; j < terms.length; j++) { int [] positions = posVec.getTermPositions(j); TermVectorOffsetInfo [] offsets = posVec.getOffsets(j); if(shouldBePosVector){ assertTrue(positions != null); assertTrue(positions.length > 0); } else assertTrue(positions == null); if(shouldBeOffVector){ assertTrue(offsets != null); assertTrue(offsets.length > 0); } else assertTrue(offsets == null); } } else{ try{ TermPositionVector posVec = (TermPositionVector)vector[0]; assertTrue(false); } catch(ClassCastException ignore){ TermFreqVector freqVec = vector[0]; String [] terms = freqVec.getTerms(); assertTrue(terms != null && terms.length > 0); } } } } catch (IOException e) { assertTrue(false); } } public void testTermOffsetVectors() { Query query = new TermQuery(new Term("field", "fifty")); try { Hits hits = searcher.search(query); assertEquals(100, hits.length()); for (int i = 0; i < hits.length(); i++) { TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i)); assertTrue(vector != null); assertTrue(vector.length == 1); //assertTrue(); } } catch (IOException e) { assertTrue(false); } } public void testKnownSetOfDocuments() { String test1 = "eating chocolate in a computer lab"; //6 terms String test2 = "computer in a computer lab"; //5 terms String test3 = "a chocolate lab grows old"; //5 terms String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms Map test4Map = new HashMap(); test4Map.put("chocolate", new Integer(3)); test4Map.put("lab", new Integer(2)); test4Map.put("eating", new Integer(1)); test4Map.put("computer", new Integer(1)); test4Map.put("with", new Integer(1)); test4Map.put("a", new Integer(1)); test4Map.put("colored", new Integer(1)); test4Map.put("in", new Integer(1)); test4Map.put("an", new Integer(1)); test4Map.put("computer", new Integer(1)); test4Map.put("old", new Integer(1)); Document testDoc1 = new Document(); setupDoc(testDoc1, test1); Document testDoc2 = new Document(); setupDoc(testDoc2, test2); Document testDoc3 = new Document(); setupDoc(testDoc3, test3); Document testDoc4 = new Document();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -