📄 testindexwriter.java
字号:
hits = searcher.search(new TermQuery(searchTerm)); assertEquals(27, hits.length()); searcher.close(); IndexReader reader = IndexReader.open(dir); reader.close(); dir.close(); } public void testHighFreqTerm() throws IOException { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setRAMBufferSizeMB(0.01); writer.setMaxFieldLength(100000000); // Massive doc that has 128 K a's StringBuffer b = new StringBuffer(1024*1024); for(int i=0;i<4096;i++) { b.append(" a a a a a a a a"); b.append(" a a a a a a a a"); b.append(" a a a a a a a a"); b.append(" a a a a a a a a"); } Document doc = new Document(); doc.add(new Field("field", b.toString(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir); assertEquals(1, reader.maxDoc()); assertEquals(1, reader.numDocs()); Term t = new Term("field", "a"); assertEquals(1, reader.docFreq(t)); TermDocs td = reader.termDocs(t); td.next(); assertEquals(128*1024, td.freq()); reader.close(); dir.close(); } // Make sure that a Directory implementation that does // not use LockFactory at all (ie overrides makeLock and // implements its own private locking) works OK. This // was raised on java-dev as loss of backwards // compatibility. public void testNullLockFactory() throws IOException { final class MyRAMDirectory extends RAMDirectory { private LockFactory myLockFactory; MyRAMDirectory() { lockFactory = null; myLockFactory = new SingleInstanceLockFactory(); } public Lock makeLock(String name) { return myLockFactory.makeLock(name); } } Directory dir = new MyRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int i = 0; i < 100; i++) { addDoc(writer); } writer.close(); Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.search(new TermQuery(searchTerm)); assertEquals("did not get right number of hits", 100, hits.length()); writer.close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.close(); dir.close(); } public void testFlushWithNoMerging() throws IOException { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(2); Document doc = new Document(); doc.add(new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for(int i=0;i<19;i++) writer.addDocument(doc); writer.flush(false, true); writer.close(); SegmentInfos sis = new SegmentInfos(); sis.read(dir); // Since we flushed w/o allowing merging we should now // have 10 segments assert sis.size() == 10; } // Make sure we can flush segment w/ norms, then add // empty doc (no norms) and flush public void testEmptyDocAfterFlushingRealDoc() throws IOException { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.add(new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); writer.flush(); writer.addDocument(new Document()); writer.close(); IndexReader reader = IndexReader.open(dir); assertEquals(2, reader.numDocs()); } // Test calling optimize(false) whereby optimize is kicked // off but we don't wait for it to finish (but // writer.close()) does wait public void testBackgroundOptimize() throws IOException { Directory dir = new MockRAMDirectory(); for(int pass=0;pass<2;pass++) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMergeScheduler(new ConcurrentMergeScheduler()); Document doc = new Document(); doc.add(new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.setMaxBufferedDocs(2); writer.setMergeFactor(101); for(int i=0;i<200;i++) writer.addDocument(doc); writer.optimize(false); if (0 == pass) { writer.close(); IndexReader reader = IndexReader.open(dir); assertTrue(reader.isOptimized()); reader.close(); } else { // Get another segment to flush so we can verify it is // NOT included in the optimization writer.addDocument(doc); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir); assertTrue(!reader.isOptimized()); reader.close(); SegmentInfos infos = new SegmentInfos(); infos.read(dir); assertEquals(2, infos.size()); } } dir.close(); } private void rmDir(File dir) { File[] files = dir.listFiles(); if (files != null) { for (int i = 0; i < files.length; i++) { files[i].delete(); } } dir.delete(); } /** * Test that no NullPointerException will be raised, * when adding one document with a single, empty field * and term vectors enabled. * @throws IOException * */ public void testBadSegment() throws IOException { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter ir = new IndexWriter(dir, new StandardAnalyzer(), true); Document document = new Document(); document.add(new Field("tvtest", "", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); ir.addDocument(document); ir.close(); dir.close(); } // LUCENE-1008 public void testNoTermVectorAfterTermVector() throws IOException { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); Document document = new Document(); document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); iw.addDocument(document); document = new Document(); document.add(new Field("tvtest", "x y z", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); iw.addDocument(document); // Make first segment iw.flush(); document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); iw.addDocument(document); // Make 2nd segment iw.flush(); iw.optimize(); iw.close(); dir.close(); } // LUCENE-1010 public void testNoTermVectorAfterTermVectorMerge() throws IOException { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); Document document = new Document(); document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); iw.addDocument(document); iw.flush(); document = new Document(); document.add(new Field("tvtest", "x y z", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); iw.addDocument(document); // Make first segment iw.flush(); iw.optimize(); document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); iw.addDocument(document); // Make 2nd segment iw.flush(); iw.optimize(); iw.close(); dir.close(); } // LUCENE-1036 public void testMaxThreadPriority() throws IOException { int pri = Thread.currentThread().getPriority(); try { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); Document document = new Document(); document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); iw.setMaxBufferedDocs(2); iw.setMergeFactor(2); Thread.currentThread().setPriority(Thread.MAX_PRIORITY); for(int i=0;i<4;i++) iw.addDocument(document); iw.close(); } finally { Thread.currentThread().setPriority(pri); } } // Just intercepts all merges & verifies that we are never // merging a segment with >= 20 (maxMergeDocs) docs private class MyMergeScheduler extends MergeScheduler { synchronized public void merge(IndexWriter writer) throws CorruptIndexException, IOException { while(true) { MergePolicy.OneMerge merge = writer.getNextMerge(); if (merge == null) break; for(int i=0;i<merge.segments.size();i++) assert merge.segments.info(i).docCount < 20; writer.merge(merge); } } public void close() {} } // LUCENE-1013 public void testSetMaxMergeDocs() throws IOException { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); iw.setMergeScheduler(new MyMergeScheduler()); iw.setMaxMergeDocs(20); iw.setMaxBufferedDocs(2); iw.setMergeFactor(2); Document document = new Document(); document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); for(int i=0;i<177;i++) iw.addDocument(document); iw.close(); } // LUCENE-1072 public void testExceptionFromTokenStream() throws IOException { RAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new Analyzer() { public TokenStream tokenStream(String fieldName, Reader reader) { return new TokenFilter(new StandardTokenizer(reader)) { private int count = 0; public Token next() throws IOException { if (count++ == 5) { throw new IOException(); } return input.next(); } }; } }, true); Document doc = new Document(); String contents = "aa bb cc dd ee ff gg hh ii jj kk"; doc.add(new Field("content", contents, Field.Store.NO, Field.Index.TOKENIZED)); try { writer.addDocument(doc); fail("did not hit expected exception"); } catch (Exception e) { } // Make sure we can add another normal document doc = new Document(); doc.add(new Field("content", "aa bb cc dd", Field.Store.NO, Field.Index.TOKENIZED)); writer.addDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.add(new Field("content", "aa bb cc dd", Field.Store.NO, Field.Index.TOKENIZED)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir); final Term t = new Term("content", "aa"); assertEquals(reader.docFreq(t), 3); // Make sure the doc that hit the exception was marked // as deleted: TermDocs tdocs = reader.termDocs(t); int count = 0; while(tdocs.next()) { count++; } assertEquals(2, count); assertEquals(reader.docFreq(new Term("content", "gg")), 0); reader.close(); dir.close(); } private static class FailOnlyOnFlush extends MockRAMDirectory.Failure { boolean doFail = false; int count; public void setDoFail() { this.doFail = true; } public void clearDoFail() { this.doFail = false; } public void eval(MockRAMDirectory dir) throws IOException { if (doFail) { StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { if ("org.apach
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -