📄 testindexwriter.java
字号:
hits = searcher.search(new TermQuery(searchTerm)); assertEquals("didn't see changes after close", 218, hits.length()); searcher.close(); dir.close(); } /* * Verify that a writer with "commit on close" indeed * cleans up the temp segments created after opening * that are not referenced by the starting segments * file. We check this by using MockRAMDirectory to * measure max temp disk space used. */ public void testCommitOnCloseDiskUsage() throws IOException { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for(int j=0;j<30;j++) { addDocWithIndex(writer, j); } writer.close(); dir.resetMaxUsedSizeInBytes(); long startDiskUsage = dir.getMaxUsedSizeInBytes(); writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); for(int j=0;j<1470;j++) { addDocWithIndex(writer, j); } long midDiskUsage = dir.getMaxUsedSizeInBytes(); dir.resetMaxUsedSizeInBytes(); writer.optimize(); writer.close(); long endDiskUsage = dir.getMaxUsedSizeInBytes(); // Ending index is 50X as large as starting index; due // to 2X disk usage normally we allow 100X max // transient usage. If something is wrong w/ deleter // and it doesn't delete intermediate segments then it // will exceed this 100X: // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage); assertTrue("writer used to much space while adding documents when autoCommit=false", midDiskUsage < 100*startDiskUsage); assertTrue("writer used to much space after close when autoCommit=false", endDiskUsage < 100*startDiskUsage); } /* * Verify that calling optimize when writer is open for * "commit on close" works correctly both for abort() * and close(). */ public void testCommitOnCloseOptimize() throws IOException { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(10); for(int j=0;j<17;j++) { addDocWithIndex(writer, j); } writer.close(); writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); writer.optimize(); // Open a reader before closing (commiting) the writer: IndexReader reader = IndexReader.open(dir); // Reader should see index as unoptimized at this // point: assertFalse("Reader incorrectly sees that the index is optimized", reader.isOptimized()); reader.close(); // Abort the writer: writer.abort(); assertNoUnreferencedFiles(dir, "aborted writer after optimize"); // Open a reader after aborting writer: reader = IndexReader.open(dir); // Reader should still see index as unoptimized: assertFalse("Reader incorrectly sees that the index is optimized", reader.isOptimized()); reader.close(); writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); writer.optimize(); writer.close(); assertNoUnreferencedFiles(dir, "aborted writer after optimize"); // Open a reader after aborting writer: reader = IndexReader.open(dir); // Reader should still see index as unoptimized: assertTrue("Reader incorrectly sees that the index is unoptimized", reader.isOptimized()); reader.close(); } public void testIndexNoDocuments() throws IOException { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.flush(); writer.close(); IndexReader reader = IndexReader.open(dir); assertEquals(0, reader.maxDoc()); assertEquals(0, reader.numDocs()); reader.close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.flush(); writer.close(); reader = IndexReader.open(dir); assertEquals(0, reader.maxDoc()); assertEquals(0, reader.numDocs()); reader.close(); } public void testManyFields() throws IOException { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(10); for(int j=0;j<100;j++) { Document doc = new Document(); doc.add(new Field("a"+j, "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("b"+j, "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("c"+j, "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("d"+j, "aaa", Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("e"+j, "aaa", Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("f"+j, "aaa", Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); } writer.close(); IndexReader reader = IndexReader.open(dir); assertEquals(100, reader.maxDoc()); assertEquals(100, reader.numDocs()); for(int j=0;j<100;j++) { assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j))); assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j))); assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j))); assertEquals(1, reader.docFreq(new Term("d"+j, "aaa"))); assertEquals(1, reader.docFreq(new Term("e"+j, "aaa"))); assertEquals(1, reader.docFreq(new Term("f"+j, "aaa"))); } reader.close(); dir.close(); } public void testSmallRAMBuffer() throws IOException { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setRAMBufferSizeMB(0.000001); int lastNumFile = dir.list().length; for(int j=0;j<9;j++) { Document doc = new Document(); doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); int numFile = dir.list().length; // Verify that with a tiny RAM buffer we see new // segment after every doc assertTrue(numFile > lastNumFile); lastNumFile = numFile; } writer.close(); dir.close(); } // Make sure it's OK to change RAM buffer size and // maxBufferedDocs in a write session public void testChangingRAMBuffer() throws IOException { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(10); writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); long lastGen = -1; for(int j=1;j<52;j++) { Document doc = new Document(); doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); _TestUtil.syncConcurrentMerges(writer); long gen = SegmentInfos.generationFromSegmentsFileName(SegmentInfos.getCurrentSegmentFileName(dir.list())); if (j == 1) lastGen = gen; else if (j < 10) // No new files should be created assertEquals(gen, lastGen); else if (10 == j) { assertTrue(gen > lastGen); lastGen = gen; writer.setRAMBufferSizeMB(0.000001); writer.setMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH); } else if (j < 20) { assertTrue(gen > lastGen); lastGen = gen; } else if (20 == j) { writer.setRAMBufferSizeMB(16); writer.setMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH); lastGen = gen; } else if (j < 30) { assertEquals(gen, lastGen); } else if (30 == j) { writer.setRAMBufferSizeMB(0.000001); writer.setMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH); } else if (j < 40) { assertTrue(gen> lastGen); lastGen = gen; } else if (40 == j) { writer.setMaxBufferedDocs(10); writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); lastGen = gen; } else if (j < 50) { assertEquals(gen, lastGen); writer.setMaxBufferedDocs(10); writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); } else if (50 == j) { assertTrue(gen > lastGen); } } writer.close(); dir.close(); } public void testChangingRAMBuffer2() throws IOException { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(10); writer.setMaxBufferedDeleteTerms(10); writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); for(int j=1;j<52;j++) { Document doc = new Document(); doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); } long lastGen = -1; for(int j=1;j<52;j++) { writer.deleteDocuments(new Term("field", "aaa" + j)); _TestUtil.syncConcurrentMerges(writer); long gen = SegmentInfos.generationFromSegmentsFileName(SegmentInfos.getCurrentSegmentFileName(dir.list())); if (j == 1) lastGen = gen; else if (j < 10) { // No new files should be created assertEquals(gen, lastGen); } else if (10 == j) { assertTrue(gen > lastGen); lastGen = gen; writer.setRAMBufferSizeMB(0.000001); writer.setMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH); } else if (j < 20) { assertTrue(gen > lastGen); lastGen = gen; } else if (20 == j) { writer.setRAMBufferSizeMB(16); writer.setMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH); lastGen = gen; } else if (j < 30) { assertEquals(gen, lastGen); } else if (30 == j) { writer.setRAMBufferSizeMB(0.000001); writer.setMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH); } else if (j < 40) { assertTrue(gen> lastGen); lastGen = gen; } else if (40 == j) { writer.setMaxBufferedDeleteTerms(10); writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); lastGen = gen; } else if (j < 50) { assertEquals(gen, lastGen); writer.setMaxBufferedDeleteTerms(10); writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); } else if (50 == j) { assertTrue(gen > lastGen); } } writer.close(); dir.close(); } public void testDiverseDocs() throws IOException { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setRAMBufferSizeMB(0.5); Random rand = new Random(31415); for(int i=0;i<3;i++) { // First, docs where every term is unique (heavy on // Posting instances) for(int j=0;j<100;j++) { Document doc = new Document(); for(int k=0;k<100;k++) { doc.add(new Field("field", Integer.toString(rand.nextInt()), Field.Store.YES, Field.Index.TOKENIZED)); } writer.addDocument(doc); } // Next, many single term docs where only one term // occurs (heavy on byte blocks) for(int j=0;j<100;j++) { Document doc = new Document(); doc.add(new Field("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); } // Next, many single term docs where only one term // occurs but the terms are very long (heavy on // char[] arrays) for(int j=0;j<100;j++) { StringBuffer b = new StringBuffer(); String x = Integer.toString(j) + "."; for(int k=0;k<1000;k++) b.append(x); String longTerm = b.toString(); Document doc = new Document(); doc.add(new Field("field", longTerm, Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); } } writer.close(); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.search(new TermQuery(new Term("field", "aaa"))); assertEquals(300, hits.length()); searcher.close(); dir.close(); } public void testEnablingNorms() throws IOException { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(10); // Enable norms for only 1 doc, pre flush for(int j=0;j<10;j++) { Document doc = new Document(); Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED); if (j != 8) { f.setOmitNorms(true); } doc.add(f); writer.addDocument(doc); } writer.close(); Term searchTerm = new Term("field", "aaa"); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.search(new TermQuery(searchTerm)); assertEquals(10, hits.length()); searcher.close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(10); // Enable norms for only 1 doc, post flush for(int j=0;j<27;j++) { Document doc = new Document(); Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED); if (j != 26) { f.setOmitNorms(true); } doc.add(f); writer.addDocument(doc); } writer.close(); searcher = new IndexSearcher(dir);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -