📄 standardbenchmarker.java
字号:
package org.apache.lucene.benchmark.standard;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.benchmark.AbstractBenchmarker;import org.apache.lucene.benchmark.BenchmarkOptions;import org.apache.lucene.benchmark.Benchmarker;import org.apache.lucene.benchmark.stats.QueryData;import org.apache.lucene.benchmark.stats.TestData;import org.apache.lucene.benchmark.stats.TestRunData;import org.apache.lucene.benchmark.stats.TimeData;import org.apache.lucene.document.DateTools;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.Hits;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.store.FSDirectory;import java.io.*;import java.text.DateFormat;import java.text.SimpleDateFormat;import java.util.*;/** * Copyright 2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//** * Reads in the Reuters Collection, downloaded from http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz * in the workingDir/reuters and indexes them using the {@link org.apache.lucene.analysis.standard.StandardAnalyzer} *<p/> * Runs a standard set of documents through an Indexer and then runs a standard set of queries against the index. * * @see org.apache.lucene.benchmark.standard.StandardBenchmarker#benchmark(java.io.File, org.apache.lucene.benchmark.BenchmarkOptions) * * @deprecated use the byTask code instead. See http://lucene.zones.apache.org:8080/hudson/job/Lucene-Nightly/javadoc/org/apache/lucene/benchmark/byTask/package-summary.html . **/public class StandardBenchmarker extends AbstractBenchmarker implements Benchmarker{ public static final String SOURCE_DIR = "reuters-out"; public static final String INDEX_DIR = "index"; //30-MAR-1987 14:22:36.87 private static DateFormat format = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US); //DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.SHORT); static{ format.setLenient(true); } public StandardBenchmarker() { } public TestData [] benchmark(File workingDir, BenchmarkOptions opts) throws Exception { StandardOptions options = (StandardOptions) opts; workingDir.mkdirs(); File sourceDir = getSourceDirectory(workingDir); sourceDir.mkdirs(); File indexDir = new File(workingDir, INDEX_DIR); indexDir.mkdirs(); Analyzer a = new StandardAnalyzer(); List queryList = new ArrayList(20); queryList.addAll(Arrays.asList(ReutersQueries.STANDARD_QUERIES)); queryList.addAll(Arrays.asList(ReutersQueries.getPrebuiltQueries("body"))); Query[] qs = createQueries(queryList, a); // Here you can limit the set of query benchmarks QueryData[] qds = QueryData.getAll(qs); // Here you can narrow down the set of test parameters TestData[] params = TestData.getTestDataMinMaxMergeAndMaxBuffered(new File[]{sourceDir/*, jumboDir*/}, new Analyzer[]{a});//TestData.getAll(new File[]{sourceDir, jumboDir}, new Analyzer[]{a}); System.out.println("Testing " + params.length + " different permutations."); for (int i = 0; i < params.length; i++) { try { reset(indexDir); params[i].setDirectory(FSDirectory.getDirectory(indexDir)); params[i].setQueries(qds); System.out.println(params[i]); runBenchmark(params[i], options); // Here you can collect and output the runData for further processing. System.out.println(params[i].showRunData(params[i].getId())); //bench.runSearchBenchmark(queries, dir); params[i].getDirectory().close(); System.runFinalization(); System.gc(); } catch (Exception e) { e.printStackTrace(); System.out.println("EXCEPTION: " + e.getMessage()); //break; } } return params; } protected File getSourceDirectory(File workingDir) { return new File(workingDir, SOURCE_DIR); } /** * Run benchmark using supplied parameters. * * @param params benchmark parameters * @throws Exception */ protected void runBenchmark(TestData params, StandardOptions options) throws Exception { System.out.println("Start Time: " + new Date()); int runCount = options.getRunCount(); for (int i = 0; i < runCount; i++) { TestRunData trd = new TestRunData(); trd.startRun(); trd.setId(String.valueOf(i)); IndexWriter iw = new IndexWriter(params.getDirectory(), params.getAnalyzer(), true); iw.setMergeFactor(params.getMergeFactor()); iw.setMaxBufferedDocs(params.getMaxBufferedDocs()); iw.setUseCompoundFile(params.isCompound()); makeIndex(trd, params.getSource(), iw, true, true, false, options); if (params.isOptimize()) { TimeData td = new TimeData("optimize"); trd.addData(td); td.start(); iw.optimize(); td.stop(); trd.addData(td); } iw.close(); QueryData[] queries = params.getQueries(); if (queries != null) { IndexReader ir = null; IndexSearcher searcher = null; for (int k = 0; k < queries.length; k++) { QueryData qd = queries[k]; if (ir != null && qd.reopen) { searcher.close(); ir.close(); ir = null; searcher = null; } if (ir == null) { ir = IndexReader.open(params.getDirectory()); searcher = new IndexSearcher(ir); } Document doc = null; if (qd.warmup) { TimeData td = new TimeData(qd.id + "-warm"); for (int m = 0; m < ir.maxDoc(); m++) { td.start(); if (ir.isDeleted(m)) { td.stop(); continue; } doc = ir.document(m); td.stop(); } trd.addData(td); } TimeData td = new TimeData(qd.id + "-srch"); td.start(); Hits h = searcher.search(qd.q); //System.out.println("Hits Size: " + h.length() + " Query: " + qd.q); td.stop(); trd.addData(td); td = new TimeData(qd.id + "-trav"); if (h != null && h.length() > 0) { for (int m = 0; m < h.length(); m++) { td.start(); int id = h.id(m); if (qd.retrieve) { doc = ir.document(id); } td.stop(); } } trd.addData(td); } try { if (searcher != null) { searcher.close(); } } catch (Exception e) { } ; try { if (ir != null) { ir.close(); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -