testindexwriter.java

来自「一套java版本的搜索引擎源码」· Java 代码 · 共 745 行 · 第 1/2 页
JAVA
745 行
package org.apache.lucene.index;import java.io.IOException;import java.io.File;import java.util.Arrays;import junit.framework.TestCase;import org.apache.lucene.analysis.WhitespaceAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Hits;import org.apache.lucene.search.TermQuery;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.store.IndexInput;import org.apache.lucene.store.IndexOutput;import org.apache.lucene.store.MockRAMDirectory;import org.apache.lucene.store.LockFactory;import org.apache.lucene.store.Lock;import org.apache.lucene.store.SingleInstanceLockFactory;/** * @author goller * @version $Id: TestIndexWriter.java 499089 2007-01-23 17:33:11Z mikemccand $ */public class TestIndexWriter extends TestCase{    public void testDocCount() throws IOException    {        Directory dir = new RAMDirectory();        IndexWriter writer = null;        IndexReader reader = null;        int i;        IndexWriter.setDefaultWriteLockTimeout(2000);        assertEquals(2000, IndexWriter.getDefaultWriteLockTimeout());        writer  = new IndexWriter(dir, new WhitespaceAnalyzer());        IndexWriter.setDefaultWriteLockTimeout(1000);        // add 100 documents        for (i = 0; i < 100; i++) {            addDoc(writer);        }        assertEquals(100, writer.docCount());        writer.close();        // delete 40 documents        reader = IndexReader.open(dir);        for (i = 0; i < 40; i++) {            reader.deleteDocument(i);        }        reader.close();        // test doc count before segments are merged/index is optimized        writer = new IndexWriter(dir, new WhitespaceAnalyzer());        assertEquals(100, writer.docCount());        writer.close();        reader = IndexReader.open(dir);        assertEquals(100, reader.maxDoc());        assertEquals(60, reader.numDocs());        reader.close();        // optimize the index and check that the new doc count is correct        writer = new IndexWriter(dir, new WhitespaceAnalyzer());        writer.optimize();        assertEquals(60, writer.docCount());        writer.close();        // check that the index reader gives the same numbers.        reader = IndexReader.open(dir);        assertEquals(60, reader.maxDoc());        assertEquals(60, reader.numDocs());        reader.close();        // make sure opening a new index for create over        // this existing one works correctly:        writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);        assertEquals(0, writer.docCount());        writer.close();    }    private void addDoc(IndexWriter writer) throws IOException    {        Document doc = new Document();        doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED));        writer.addDocument(doc);    }    private void addDocWithIndex(IndexWriter writer, int index) throws IOException    {        Document doc = new Document();        doc.add(new Field("content", "aaa " + index, Field.Store.YES, Field.Index.TOKENIZED));        doc.add(new Field("id", "" + index, Field.Store.YES, Field.Index.TOKENIZED));        writer.addDocument(doc);    }    /*      Test: make sure when we run out of disk space or hit      random IOExceptions in any of the addIndexes(*) calls      that 1) index is not corrupt (searcher can open/search      it) and 2) transactional semantics are followed:      either all or none of the incoming documents were in      fact added.    */    public void testAddIndexOnDiskFull() throws IOException    {      int START_COUNT = 57;      int NUM_DIR = 50;      int END_COUNT = START_COUNT + NUM_DIR*25;      boolean debug = false;      // Build up a bunch of dirs that have indexes which we      // will then merge together by calling addIndexes(*):      Directory[] dirs = new Directory[NUM_DIR];      long inputDiskUsage = 0;      for(int i=0;i<NUM_DIR;i++) {        dirs[i] = new RAMDirectory();        IndexWriter writer  = new IndexWriter(dirs[i], new WhitespaceAnalyzer(), true);        for(int j=0;j<25;j++) {          addDocWithIndex(writer, 25*i+j);        }        writer.close();        String[] files = dirs[i].list();        for(int j=0;j<files.length;j++) {          inputDiskUsage += dirs[i].fileLength(files[j]);        }      }      // Now, build a starting index that has START_COUNT docs.  We      // will then try to addIndexes into a copy of this:      RAMDirectory startDir = new RAMDirectory();      IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), true);              for(int j=0;j<START_COUNT;j++) {        addDocWithIndex(writer, j);      }      writer.close();      // Make sure starting index seems to be working properly:      Term searchTerm = new Term("content", "aaa");              IndexReader reader = IndexReader.open(startDir);      assertEquals("first docFreq", 57, reader.docFreq(searchTerm));      IndexSearcher searcher = new IndexSearcher(reader);      Hits hits = searcher.search(new TermQuery(searchTerm));      assertEquals("first number of hits", 57, hits.length());      searcher.close();      reader.close();      // Iterate with larger and larger amounts of free      // disk space.  With little free disk space,      // addIndexes will certainly run out of space &      // fail.  Verify that when this happens, index is      // not corrupt and index in fact has added no      // documents.  Then, we increase disk space by 1000      // bytes each iteration.  At some point there is      // enough free disk space and addIndexes should      // succeed and index should show all documents were      // added.      // String[] files = startDir.list();      long diskUsage = startDir.sizeInBytes();      long startDiskUsage = 0;      String[] files = startDir.list();      for(int i=0;i<files.length;i++) {        startDiskUsage += startDir.fileLength(files[i]);      }      for(int method=0;method<3;method++) {        // Start with 100 bytes more than we are currently using:        long diskFree = diskUsage+100;        boolean success = false;        boolean done = false;        String methodName;        if (0 == method) {          methodName = "addIndexes(Directory[])";        } else if (1 == method) {          methodName = "addIndexes(IndexReader[])";        } else {          methodName = "addIndexesNoOptimize(Directory[])";        }        String testName = "disk full test for method " + methodName + " with disk full at " + diskFree + " bytes";        int cycleCount = 0;        while(!done) {          cycleCount++;          // Make a new dir that will enforce disk usage:          MockRAMDirectory dir = new MockRAMDirectory(startDir);          writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);          IOException err = null;          for(int x=0;x<2;x++) {            // Two loops: first time, limit disk space &            // throw random IOExceptions; second time, no            // disk space limit:            double rate = 0.05;            double diskRatio = ((double) diskFree)/diskUsage;            long thisDiskFree;            if (0 == x) {              thisDiskFree = diskFree;              if (diskRatio >= 2.0) {                rate /= 2;              }              if (diskRatio >= 4.0) {                rate /= 2;              }              if (diskRatio >= 6.0) {                rate = 0.0;              }              if (debug) {                System.out.println("\ncycle: " + methodName + ": " + diskFree + " bytes");              }            } else {              thisDiskFree = 0;              rate = 0.0;              if (debug) {                System.out.println("\ncycle: " + methodName + ", same writer: unlimited disk space");              }            }            dir.setMaxSizeInBytes(thisDiskFree);            dir.setRandomIOExceptionRate(rate, diskFree);            try {              if (0 == method) {                writer.addIndexes(dirs);              } else if (1 == method) {                IndexReader readers[] = new IndexReader[dirs.length];                for(int i=0;i<dirs.length;i++) {                  readers[i] = IndexReader.open(dirs[i]);                }                try {                  writer.addIndexes(readers);                } finally {                  for(int i=0;i<dirs.length;i++) {                    readers[i].close();                  }                }              } else {                writer.addIndexesNoOptimize(dirs);              }              success = true;              if (debug) {                System.out.println("  success!");              }              if (0 == x) {                done = true;              }            } catch (IOException e) {              success = false;              err = e;              if (debug) {                System.out.println("  hit IOException: " + e);              }              if (1 == x) {                e.printStackTrace();                fail(methodName + " hit IOException after disk space was freed up");              }            }            // Whether we succeeded or failed, check that all            // un-referenced files were in fact deleted (ie,            // we did not create garbage).  Just create a            // new IndexFileDeleter, have it delete            // unreferenced files, then verify that in fact            // no files were deleted:            String[] startFiles = dir.list();            SegmentInfos infos = new SegmentInfos();            infos.read(dir);            IndexFileDeleter d = new IndexFileDeleter(infos, dir);            d.findDeletableFiles();            d.deleteFiles();            String[] endFiles = dir.list();            Arrays.sort(startFiles);            Arrays.sort(endFiles);            /*              for(int i=0;i<startFiles.length;i++) {              System.out.println("  " + i + ": " + startFiles[i]);              }            */            if (!Arrays.equals(startFiles, endFiles)) {              String successStr;              if (success) {                successStr = "success";              } else {                successStr = "IOException";                err.printStackTrace();              }              fail(methodName + " failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes): before delete:\n    " + arrayToString(startFiles) + "\n  after delete:\n    " + arrayToString(endFiles));            }            if (debug) {              System.out.println("  now test readers");            }            // Finally, verify index is not corrupt, and, if            // we succeeded, we see all docs added, and if we            // failed, we see either all docs or no docs added            // (transactional semantics):            try {              reader = IndexReader.open(dir);            } catch (IOException e) {              e.printStackTrace();              fail(testName + ": exception when creating IndexReader: " + e);            }            int result = reader.docFreq(searchTerm);            if (success) {              if (result != END_COUNT) {                fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT);              }            } else {              // On hitting exception we still may have added              // all docs:              if (result != START_COUNT && result != END_COUNT) {                err.printStackTrace();                fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);              }            }            searcher = new IndexSearcher(reader);            try {              hits = searcher.search(new TermQuery(searchTerm));            } catch (IOException e) {              e.printStackTrace();              fail(testName + ": exception when searching: " + e);            }            int result2 = hits.length();            if (success) {              if (result2 != result) {                fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result);              }            } else {              // On hitting exception we still may have added              // all docs:              if (result2 != result) {                err.printStackTrace();                fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result);              }            }            searcher.close();            reader.close();            if (debug) {
testindexwriter.java - 源码说明

本页面展示了「一套java版本的搜索引擎源码」中的 testindexwriter.java 源码文件，采用 Java 编程语言编写，共 745 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?