📄 standardbenchmarker.java
字号:
} catch (Exception e) { } ; } trd.endRun(); params.getRunData().add(trd); //System.out.println(params[i].showRunData(params[i].getId())); //params.showRunData(params.getId()); } System.out.println("End Time: " + new Date()); } /** * Parse the Reuters SGML and index: * Date, Title, Dateline, Body * * * * @param in input file * @return Lucene document */ protected Document makeDocument(File in, String[] tags, boolean stored, boolean tokenized, boolean tfv) throws Exception { Document doc = new Document(); // tag this document if (tags != null) { for (int i = 0; i < tags.length; i++) { doc.add(new Field("tag" + i, tags[i], stored == true ? Field.Store.YES : Field.Store.NO, tokenized == true ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED, tfv == true ? Field.TermVector.YES : Field.TermVector.NO)); } } doc.add(new Field("file", in.getCanonicalPath(), stored == true ? Field.Store.YES : Field.Store.NO, tokenized == true ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED, tfv == true ? Field.TermVector.YES : Field.TermVector.NO)); BufferedReader reader = new BufferedReader(new FileReader(in)); String line = null; //First line is the date, 3rd is the title, rest is body String dateStr = reader.readLine(); reader.readLine();//skip an empty line String title = reader.readLine(); reader.readLine();//skip an empty line StringBuffer body = new StringBuffer(1024); while ((line = reader.readLine()) != null) { body.append(line).append(' '); } reader.close(); Date date = format.parse(dateStr.trim()); doc.add(new Field("date", DateTools.dateToString(date, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.UN_TOKENIZED)); if (title != null) { doc.add(new Field("title", title, stored == true ? Field.Store.YES : Field.Store.NO, tokenized == true ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED, tfv == true ? Field.TermVector.YES : Field.TermVector.NO)); } if (body.length() > 0) { doc.add(new Field("body", body.toString(), stored == true ? Field.Store.YES : Field.Store.NO, tokenized == true ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED, tfv == true ? Field.TermVector.YES : Field.TermVector.NO)); } return doc; } /** * Make index, and collect time data. * * @param trd run data to populate * @param srcDir directory with source files * @param iw index writer, already open * @param stored store values of fields * @param tokenized tokenize fields * @param tfv store term vectors * @throws Exception */ protected void makeIndex(TestRunData trd, File srcDir, IndexWriter iw, boolean stored, boolean tokenized, boolean tfv, StandardOptions options) throws Exception { //File[] groups = srcDir.listFiles(); List files = new ArrayList(); getAllFiles(srcDir, null, files); Document doc = null; long cnt = 0L; TimeData td = new TimeData(); td.name = "addDocument"; int scaleUp = options.getScaleUp(); int logStep = options.getLogStep(); int max = Math.min(files.size(), options.getMaximumDocumentsToIndex()); for (int s = 0; s < scaleUp; s++) { String[] tags = new String[]{srcDir.getName() + "/" + s}; int i = 0; for (Iterator iterator = files.iterator(); iterator.hasNext() && i < max; i++) { File file = (File) iterator.next(); doc = makeDocument(file, tags, stored, tokenized, tfv); td.start(); iw.addDocument(doc); td.stop(); cnt++; if (cnt % logStep == 0) { System.err.println(" - processed " + cnt + ", run id=" + trd.getId()); trd.addData(td); td.reset(); } } } trd.addData(td); } public static void getAllFiles(File srcDir, FileFilter filter, List allFiles) { File [] files = srcDir.listFiles(filter); for (int i = 0; i < files.length; i++) { File file = files[i]; if (file.isDirectory()) { getAllFiles(file, filter, allFiles); } else { allFiles.add(file); } } } /** * Parse the strings containing Lucene queries. * * @param qs array of strings containing query expressions * @param a analyzer to use when parsing queries * @return array of Lucene queries */ public static Query[] createQueries(List qs, Analyzer a) { QueryParser qp = new QueryParser("body", a); List queries = new ArrayList(); for (int i = 0; i < qs.size(); i++) { try { Object query = qs.get(i); Query q = null; if (query instanceof String) { q = qp.parse((String) query); } else if (query instanceof Query) { q = (Query) query; } else { System.err.println("Unsupported Query Type: " + query); } if (q != null) { queries.add(q); } } catch (Exception e) { e.printStackTrace(); } } return (Query[]) queries.toArray(new Query[0]); } /** * Remove existing index. * * @throws Exception */ protected void reset(File indexDir) throws Exception { if (indexDir.exists()) { fullyDelete(indexDir); } indexDir.mkdirs(); } /** * Save a stream to a file. * * @param is input stream * @param out output file * @param closeInput if true, close the input stream when done. * @throws Exception */ protected void saveStream(InputStream is, File out, boolean closeInput) throws Exception { byte[] buf = new byte[4096]; FileOutputStream fos = new FileOutputStream(out); int len = 0; long total = 0L; long time = System.currentTimeMillis(); long delta = time; while ((len = is.read(buf)) > 0) { fos.write(buf, 0, len); total += len; time = System.currentTimeMillis(); if (time - delta > 5000) { System.err.println(" - copied " + total / 1024 + " kB..."); delta = time; } } fos.flush(); fos.close(); if (closeInput) { is.close(); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -