📄 testindexwriter.java
字号:
e.printStackTrace(System.out); fail(testName + ": exception when creating IndexReader: " + e); } int result = reader.docFreq(searchTerm); if (success) { if (autoCommit && result != END_COUNT) { fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT); } else if (!autoCommit && result != START_COUNT) { fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " [autoCommit = false]"); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { err.printStackTrace(System.out); fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher = new IndexSearcher(reader); try { hits = searcher.search(new TermQuery(searchTerm)); } catch (IOException e) { e.printStackTrace(System.out); fail(testName + ": exception when searching: " + e); } int result2 = hits.length(); if (success) { if (result2 != result) { fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } else { // On hitting exception we still may have added // all docs: if (result2 != result) { err.printStackTrace(System.out); fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } searcher.close(); reader.close(); if (debug) { System.out.println(" count is " + result); } if (done || result == END_COUNT) { break; } } if (debug) { System.out.println(" start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.getMaxUsedSizeInBytes()); } if (done) { // Javadocs state that temp free Directory space // required is at most 2X total input size of // indices so let's make sure: assertTrue("max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.getMaxUsedSizeInBytes()-startDiskUsage) + " bytes; " + "starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes", (dir.getMaxUsedSizeInBytes()-startDiskUsage) < 2*(startDiskUsage + inputDiskUsage)); } writer.close(); // Wait for all BG threads to finish else // dir.close() will throw IOException because // there are still open files _TestUtil.syncConcurrentMerges(ms); dir.close(); // Try again with 2000 more bytes of free space: diskFree += 2000; } } startDir.close(); } /* * Make sure IndexWriter cleans up on hitting a disk * full exception in addDocument. */ public void testAddDocumentOnDiskFull() throws IOException { boolean debug = false; for(int pass=0;pass<3;pass++) { if (debug) System.out.println("TEST: pass=" + pass); boolean autoCommit = pass == 0; boolean doAbort = pass == 2; long diskFree = 200; while(true) { if (debug) System.out.println("TEST: cycle: diskFree=" + diskFree); MockRAMDirectory dir = new MockRAMDirectory(); dir.setMaxSizeInBytes(diskFree); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); MergeScheduler ms = writer.getMergeScheduler(); if (ms instanceof ConcurrentMergeScheduler) // This test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. ((ConcurrentMergeScheduler) ms).setSuppressExceptions(); boolean hitError = false; try { for(int i=0;i<200;i++) { addDoc(writer); } } catch (IOException e) { if (debug) { System.out.println("TEST: exception on addDoc"); e.printStackTrace(System.out); } hitError = true; } if (hitError) { if (doAbort) { writer.abort(); } else { try { writer.close(); } catch (IOException e) { if (debug) { System.out.println("TEST: exception on close"); e.printStackTrace(System.out); } dir.setMaxSizeInBytes(0); writer.close(); } } _TestUtil.syncConcurrentMerges(ms); assertNoUnreferencedFiles(dir, "after disk full during addDocument with autoCommit=" + autoCommit); // Make sure reader can open the index: IndexReader.open(dir).close(); dir.close(); // Now try again w/ more space: diskFree += 500; } else { _TestUtil.syncConcurrentMerges(writer); dir.close(); break; } } } } public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException { String[] startFiles = dir.list(); SegmentInfos infos = new SegmentInfos(); infos.read(dir); new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, null); String[] endFiles = dir.list(); Arrays.sort(startFiles); Arrays.sort(endFiles); if (!Arrays.equals(startFiles, endFiles)) { fail(message + ": before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles)); } } /** * Make sure we skip wicked long terms. */ public void testWickedLongTerm() throws IOException { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true); char[] chars = new char[16383]; Arrays.fill(chars, 'x'); Document doc = new Document(); final String bigTerm = new String(chars); // Max length term is 16383, so this contents produces // a too-long term: String contents = "abc xyz x" + bigTerm + " another term"; doc.add(new Field("content", contents, Field.Store.NO, Field.Index.TOKENIZED)); writer.addDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.TOKENIZED)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir); // Make sure all terms < max size were indexed assertEquals(2, reader.docFreq(new Term("content", "abc"))); assertEquals(1, reader.docFreq(new Term("content", "bbb"))); assertEquals(1, reader.docFreq(new Term("content", "term"))); assertEquals(1, reader.docFreq(new Term("content", "another"))); // Make sure position is still incremented when // massive term is skipped: TermPositions tps = reader.termPositions(new Term("content", "another")); assertTrue(tps.next()); assertEquals(1, tps.freq()); assertEquals(3, tps.nextPosition()); // Make sure the doc that has the massive term is in // the index: assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); reader.close(); // Make sure we can add a document with exactly the // maximum length term, and search on that term: doc = new Document(); doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.TOKENIZED)); StandardAnalyzer sa = new StandardAnalyzer(); sa.setMaxTokenLength(100000); writer = new IndexWriter(dir, sa); writer.addDocument(doc); writer.close(); reader = IndexReader.open(dir); assertEquals(1, reader.docFreq(new Term("content", bigTerm))); reader.close(); dir.close(); } public void testOptimizeMaxNumSegments() throws IOException { MockRAMDirectory dir = new MockRAMDirectory(); final Document doc = new Document(); doc.add(new Field("content", "aaa", Field.Store.YES, Field.Index.TOKENIZED)); for(int numDocs=38;numDocs<500;numDocs += 38) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMinMergeDocs(1); writer.setMergePolicy(ldmp); writer.setMergeFactor(5); writer.setMaxBufferedDocs(2); for(int j=0;j<numDocs;j++) writer.addDocument(doc); writer.close(); SegmentInfos sis = new SegmentInfos(); sis.read(dir); final int segCount = sis.size(); writer = new IndexWriter(dir, new WhitespaceAnalyzer()); writer.setMergePolicy(ldmp); writer.setMergeFactor(5); writer.optimize(3); writer.close(); sis = new SegmentInfos(); sis.read(dir); final int optSegCount = sis.size(); if (segCount < 3) assertEquals(segCount, optSegCount); else assertEquals(3, optSegCount); } } public void testOptimizeMaxNumSegments2() throws IOException { MockRAMDirectory dir = new MockRAMDirectory(); final Document doc = new Document(); doc.add(new Field("content", "aaa", Field.Store.YES, Field.Index.TOKENIZED)); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMinMergeDocs(1); writer.setMergePolicy(ldmp); writer.setMergeFactor(4); writer.setMaxBufferedDocs(2); for(int iter=0;iter<10;iter++) { for(int i=0;i<19;i++) writer.addDocument(doc); writer.flush(); SegmentInfos sis = new SegmentInfos(); ((ConcurrentMergeScheduler) writer.getMergeScheduler()).sync(); sis.read(dir); final int segCount = sis.size(); writer.optimize(7); sis = new SegmentInfos(); ((ConcurrentMergeScheduler) writer.getMergeScheduler()).sync(); sis.read(dir); final int optSegCount = sis.size(); if (segCount < 7) assertEquals(segCount, optSegCount); else assertEquals(7, optSegCount); } } /** * Make sure optimize doesn't use any more than 1X * starting index size as its temporary free space * required. */ public void testOptimizeTempSpaceUsage() throws IOException { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for(int j=0;j<500;j++) { addDocWithIndex(writer, j); } writer.close(); long startDiskUsage = 0; String[] files = dir.list(); for(int i=0;i<files.length;i++) { startDiskUsage += dir.fileLength(files[i]); } dir.resetMaxUsedSizeInBytes(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.optimize(); writer.close(); long maxDiskUsage = dir.getMaxUsedSizeInBytes(); assertTrue("optimized used too much temporary space: starting usage was " + startDiskUsage + " bytes; max temp usage was " + maxDiskUsage + " but should have been " + (2*startDiskUsage) + " (= 2X starting usage)", maxDiskUsage <= 2*startDiskUsage); dir.close(); } static String arrayToString(String[] l) { String s = ""; for(int i=0;i<l.length;i++) { if (i > 0) { s += "\n "; } s += l[i]; } return s; } // Make sure we can open an index for create even when a // reader holds it open (this fails pre lock-less // commits on windows): public void testCreateWithReader() throws IOException { String tempDir = System.getProperty("java.io.tmpdir"); if (tempDir == null) throw new IOException("java.io.tmpdir undefined, cannot run test"); File indexDir = new File(tempDir, "lucenetestindexwriter"); try { Directory dir = FSDirectory.getDirectory(indexDir);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -