📄 testperftaskslogic.java

📁 Lucene a java open-source SearchEngine Framework
💻 JAVA
字号:
/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.lucene.benchmark.byTask;import java.io.StringReader;import java.io.File;import java.io.FileReader;import java.io.BufferedReader;import java.util.List;import java.util.Iterator;import org.apache.lucene.benchmark.byTask.Benchmark;import org.apache.lucene.benchmark.byTask.feeds.DocData;import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker;import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;import org.apache.lucene.benchmark.byTask.stats.TaskStats;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.TermEnum;import org.apache.lucene.index.TermDocs;import junit.framework.TestCase;/** * Test very simply that perf tasks - simple algorithms - are doing what they should. */public class TestPerfTasksLogic extends TestCase {  private static final boolean DEBUG = false;  static final String NEW_LINE = System.getProperty("line.separator");    // properties in effect in all tests here  static final String propLines [] = {    "directory=RAMDirectory",    "print.props=false",  };    /**   * @param name test name   */  public TestPerfTasksLogic(String name) {    super(name);  }  /**   * Test index creation logic   */  public void testIndexAndSearchTasks() throws Exception {    // 1. alg definition (required in every "logic" test)    String algLines[] = {        "ResetSystemErase",        "CreateIndex",        "{ AddDoc } : 1000",        "Optimize",        "CloseIndex",        "OpenReader",        "{ CountingSearchTest } : 200",        "CloseReader",        "[ CountingSearchTest > : 70",        "[ CountingSearchTest > : 9",    };        // 2. we test this value later    CountingSearchTestTask.numSearches = 0;        // 3. execute the algorithm  (required in every "logic" test)    Benchmark benchmark = execBenchmark(algLines);    // 4. test specific checks after the benchmark run completed.    assertEquals("TestSearchTask was supposed to be called!",279,CountingSearchTestTask.numSearches);    assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));    // now we should be able to open the index for write.     IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);    iw.close();    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());    assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());    ir.close();  }  /**   * Test Exhasting Doc Maker logic   */  public void testExhaustDocMaker() throws Exception {    // 1. alg definition (required in every "logic" test)    String algLines[] = {        "# ----- properties ",        "doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker",        "doc.add.log.step=1",        "doc.term.vector=false",        "doc.maker.forever=false",        "directory=RAMDirectory",        "doc.stored=false",        "doc.tokenized=false",        "# ----- alg ",        "CreateIndex",        "{ AddDoc } : * ",        "Optimize",        "CloseIndex",        "OpenReader",        "{ CountingSearchTest } : 100",        "CloseReader",        "[ CountingSearchTest > : 30",        "[ CountingSearchTest > : 9",    };        // 2. we test this value later    CountingSearchTestTask.numSearches = 0;        // 3. execute the algorithm  (required in every "logic" test)    Benchmark benchmark = execBenchmark(algLines);    // 4. test specific checks after the benchmark run completed.    assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches);    assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));    // now we should be able to open the index for write.     IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);    iw.close();    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());    assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());    ir.close();  }  /**   * Test Parallel Doc Maker logic (for LUCENE-940)   */  public void testParallelDocMaker() throws Exception {    // 1. alg definition (required in every "logic" test)    String algLines[] = {        "# ----- properties ",        "doc.maker="+Reuters20DocMaker.class.getName(),        "doc.add.log.step=3",        "doc.term.vector=false",        "doc.maker.forever=false",        "directory=FSDirectory",        "doc.stored=false",        "doc.tokenized=false",        "# ----- alg ",        "CreateIndex",        "[ { AddDoc } : * ] : 4 ",        "CloseIndex",    };        // 2. execute the algorithm  (required in every "logic" test)    Benchmark benchmark = execBenchmark(algLines);    // 3. test number of docs in the index    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());    int ndocsExpected = 20; // Reuters20DocMaker exhausts after 20 docs.    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());    ir.close();  }  /**   * Test WriteLineDoc and LineDocMaker.   */  public void testLineDocFile() throws Exception {    File lineFile = new File(System.getProperty("tempDir"), "test.reuters.lines.txt");    // We will call WriteLineDocs this many times    final int NUM_TRY_DOCS = 500;    // Creates a line file with first 500 docs from reuters    String algLines1[] = {      "# ----- properties ",      "doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",      "doc.maker.forever=false",      "line.file.out=" + lineFile.getAbsolutePath().replace('\\', '/'),      "# ----- alg ",      "{WriteLineDoc()}:" + NUM_TRY_DOCS,    };    // Run algo    Benchmark benchmark = execBenchmark(algLines1);    // Verify we got somewhere between 1-500 lines (some    // Reuters docs have no body, which WriteLineDoc task    // skips).    BufferedReader r = new BufferedReader(new FileReader(lineFile));    int numLines = 0;    while(r.readLine() != null)      numLines++;    r.close();    assertTrue("did not see the right number of docs; should be > 0 and <= " + NUM_TRY_DOCS + " but was " + numLines, numLines > 0 && numLines <= NUM_TRY_DOCS);        // Index the line docs    String algLines2[] = {      "# ----- properties ",      "analyzer=org.apache.lucene.analysis.SimpleAnalyzer",      "doc.maker=org.apache.lucene.benchmark.byTask.feeds.LineDocMaker",      "docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'),      "doc.maker.forever=false",      "autocommit=false",      "ram.flush.mb=4",      "# ----- alg ",      "ResetSystemErase",      "CreateIndex",      "{AddDoc}: *",      "CloseIndex",    };        // Run algo    benchmark = execBenchmark(algLines2);    // now we should be able to open the index for write.     IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);    iw.close();    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());    assertEquals(numLines + " lines were were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs());    ir.close();    lineFile.delete();  }    /**   * Test ReadTokensTask   */  public void testReadTokens() throws Exception {    // We will call ReadTokens on this many docs    final int NUM_DOCS = 100;    // Read tokens from first NUM_DOCS docs from Reuters and    // then build index from the same docs    String algLines1[] = {      "# ----- properties ",      "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer",      "doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",      "# ----- alg ",      "{ReadTokens}: " + NUM_DOCS,      "ResetSystemErase",      "CreateIndex",      "{AddDoc}: " + NUM_DOCS,      "CloseIndex",    };    // Run algo    Benchmark benchmark = execBenchmark(algLines1);    List stats = benchmark.getRunData().getPoints().taskStats();    // Count how many tokens all ReadTokens saw    int totalTokenCount1 = 0;    for (Iterator it = stats.iterator(); it.hasNext();) {      TaskStats stat = (TaskStats) it.next();      if (stat.getTask().getName().equals("ReadTokens")) {        totalTokenCount1 += stat.getCount();      }    }    // Separately count how many tokens are actually in the index:    IndexReader reader = IndexReader.open(benchmark.getRunData().getDirectory());    assertEquals(NUM_DOCS, reader.numDocs());    TermEnum terms = reader.terms();    TermDocs termDocs = reader.termDocs();    int totalTokenCount2 = 0;    while(terms.next()) {      termDocs.seek(terms.term());      while(termDocs.next())        totalTokenCount2 += termDocs.freq();    }    reader.close();    // Make sure they are the same    assertEquals(totalTokenCount1, totalTokenCount2);  }    /**   * Test that " {[AddDoc(4000)]: 4} : * " works corrcetly (for LUCENE-941)   */  public void testParallelExhausted() throws Exception {    // 1. alg definition (required in every "logic" test)    String algLines[] = {        "# ----- properties ",        "doc.maker="+Reuters20DocMaker.class.getName(),        "doc.add.log.step=3",        "doc.term.vector=false",        "doc.maker.forever=false",        "directory=RAMDirectory",        "doc.stored=false",        "doc.tokenized=false",        "debug.level=1",        "# ----- alg ",        "CreateIndex",        "{ [ AddDoc]: 4} : * ",        "ResetInputs ",        "{ [ AddDoc]: 4} : * ",        "CloseIndex",    };        // 2. execute the algorithm  (required in every "logic" test)    Benchmark benchmark = execBenchmark(algLines);    // 3. test number of docs in the index    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());    int ndocsExpected = 2 * 20; // Reuters20DocMaker exhausts after 20 docs.    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());    ir.close();  }  // create the benchmark and execute it.   public static Benchmark execBenchmark(String[] algLines) throws Exception {    String algText = algLinesToText(algLines);    logTstLogic(algText);    Benchmark benchmark = new Benchmark(new StringReader(algText));    benchmark.execute();    return benchmark;  }    // catenate alg lines to make the alg text  private static String algLinesToText(String[] algLines) {    String indent = "  ";    StringBuffer sb = new StringBuffer();    for (int i = 0; i < propLines.length; i++) {      sb.append(indent).append(propLines[i]).append(NEW_LINE);    }    for (int i = 0; i < algLines.length; i++) {      sb.append(indent).append(algLines[i]).append(NEW_LINE);    }    return sb.toString();  }  private static void logTstLogic (String txt) {    if (!DEBUG)       return;    System.out.println("Test logic of:");    System.out.println(txt);  }  /** use reuters and the exhaust mechanism, but to be faster, add 20 docs only... */  public static class Reuters20DocMaker extends ReutersDocMaker {    private int nDocs=0;    protected synchronized DocData getNextDocData() throws Exception {      if (nDocs>=20 && !forever) {        throw new NoMoreDataException();      }      nDocs++;      return super.getNextDocData();    }    public synchronized void resetInputs() {      super.resetInputs();      nDocs = 0;    }  }    /**   * Test that exhaust in loop works as expected (LUCENE-1115).   */  public void testExhaustedLooped() throws Exception {    // 1. alg definition (required in every "logic" test)    String algLines[] = {        "# ----- properties ",        "doc.maker="+Reuters20DocMaker.class.getName(),        "doc.add.log.step=3",        "doc.term.vector=false",        "doc.maker.forever=false",        "directory=RAMDirectory",        "doc.stored=false",        "doc.tokenized=false",        "debug.level=1",        "# ----- alg ",        "{ \"Rounds\"",        "  ResetSystemErase",        "  CreateIndex",        "  { \"AddDocs\"  AddDoc > : * ",        "  CloseIndex",        "} : 2",    };        // 2. execute the algorithm  (required in every "logic" test)    Benchmark benchmark = execBenchmark(algLines);    // 3. test number of docs in the index    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());    int ndocsExpected = 20; // Reuters20DocMaker exhausts after 20 docs.    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());    ir.close();  }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -