📄 standardbenchmarker.java

📁 lucene2.2.0版本
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
                }                catch (Exception e)                {                }                ;            }            trd.endRun();            params.getRunData().add(trd);            //System.out.println(params[i].showRunData(params[i].getId()));            //params.showRunData(params.getId());        }        System.out.println("End Time: " + new Date());    }    /**     * Parse the Reuters SGML and index:     * Date, Title, Dateline, Body     *     *     *     * @param in        input file     * @return Lucene document     */    protected Document makeDocument(File in, String[] tags, boolean stored, boolean tokenized, boolean tfv)            throws Exception    {        Document doc = new Document();        // tag this document        if (tags != null)        {            for (int i = 0; i < tags.length; i++)            {                doc.add(new Field("tag" + i, tags[i], stored == true ? Field.Store.YES : Field.Store.NO,                                  tokenized == true ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED, tfv == true ? Field.TermVector.YES : Field.TermVector.NO));            }        }        doc.add(new Field("file", in.getCanonicalPath(), stored == true ? Field.Store.YES : Field.Store.NO,                          tokenized == true ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED, tfv == true ? Field.TermVector.YES : Field.TermVector.NO));        BufferedReader reader = new BufferedReader(new FileReader(in));        String line = null;        //First line is the date, 3rd is the title, rest is body        String dateStr = reader.readLine();        reader.readLine();//skip an empty line        String title = reader.readLine();        reader.readLine();//skip an empty line        StringBuffer body = new StringBuffer(1024);        while ((line = reader.readLine()) != null)        {            body.append(line).append(' ');        }        reader.close();                Date date = format.parse(dateStr.trim());        doc.add(new Field("date", DateTools.dateToString(date, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.UN_TOKENIZED));        if (title != null)        {            doc.add(new Field("title", title, stored == true ? Field.Store.YES : Field.Store.NO,                              tokenized == true ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED, tfv == true ? Field.TermVector.YES : Field.TermVector.NO));        }        if (body.length() > 0)        {            doc.add(new Field("body", body.toString(), stored == true ? Field.Store.YES : Field.Store.NO,                              tokenized == true ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED, tfv == true ? Field.TermVector.YES : Field.TermVector.NO));        }        return doc;    }    /**     * Make index, and collect time data.     *     * @param trd       run data to populate     * @param srcDir    directory with source files     * @param iw        index writer, already open     * @param stored    store values of fields     * @param tokenized tokenize fields     * @param tfv       store term vectors     * @throws Exception     */    protected void makeIndex(TestRunData trd, File srcDir, IndexWriter iw, boolean stored, boolean tokenized,                             boolean tfv, StandardOptions options) throws Exception    {        //File[] groups = srcDir.listFiles();        List files = new ArrayList();        getAllFiles(srcDir, null, files);        Document doc = null;        long cnt = 0L;        TimeData td = new TimeData();        td.name = "addDocument";        int scaleUp = options.getScaleUp();        int logStep = options.getLogStep();        int max = Math.min(files.size(), options.getMaximumDocumentsToIndex());        for (int s = 0; s < scaleUp; s++)        {            String[] tags = new String[]{srcDir.getName() + "/" + s};            int i = 0;            for (Iterator iterator = files.iterator(); iterator.hasNext() && i < max; i++)            {                File file = (File) iterator.next();                doc = makeDocument(file, tags, stored, tokenized, tfv);                td.start();                iw.addDocument(doc);                td.stop();                cnt++;                if (cnt % logStep == 0)                {                    System.err.println(" - processed " + cnt + ", run id=" + trd.getId());                    trd.addData(td);                    td.reset();                }            }        }        trd.addData(td);    }    public static void getAllFiles(File srcDir, FileFilter filter, List allFiles)    {        File [] files = srcDir.listFiles(filter);        for (int i = 0; i < files.length; i++)        {            File file = files[i];            if (file.isDirectory())            {                getAllFiles(file, filter, allFiles);            }            else            {                allFiles.add(file);            }        }    }    /**     * Parse the strings containing Lucene queries.     *     * @param qs array of strings containing query expressions     * @param a  analyzer to use when parsing queries     * @return array of Lucene queries     */    public static Query[] createQueries(List qs, Analyzer a)    {        QueryParser qp = new QueryParser("body", a);        List queries = new ArrayList();        for (int i = 0; i < qs.size(); i++)        {            try            {                Object query = qs.get(i);                Query q = null;                if (query instanceof String)                {                    q = qp.parse((String) query);                }                else if (query instanceof Query)                {                    q = (Query) query;                }                else                {                    System.err.println("Unsupported Query Type: " + query);                }                if (q != null)                {                    queries.add(q);                }            }            catch (Exception e)            {                e.printStackTrace();            }        }        return (Query[]) queries.toArray(new Query[0]);    }    /**     * Remove existing index.     *     * @throws Exception     */    protected void reset(File indexDir) throws Exception    {        if (indexDir.exists())        {            fullyDelete(indexDir);        }        indexDir.mkdirs();    }    /**     * Save a stream to a file.     *     * @param is         input stream     * @param out        output file     * @param closeInput if true, close the input stream when done.     * @throws Exception     */    protected void saveStream(InputStream is, File out, boolean closeInput) throws Exception    {        byte[] buf = new byte[4096];        FileOutputStream fos = new FileOutputStream(out);        int len = 0;        long total = 0L;        long time = System.currentTimeMillis();        long delta = time;        while ((len = is.read(buf)) > 0)        {            fos.write(buf, 0, len);            total += len;            time = System.currentTimeMillis();            if (time - delta > 5000)            {                System.err.println(" - copied " + total / 1024 + " kB...");                delta = time;            }        }        fos.flush();        fos.close();        if (closeInput)        {            is.close();        }    }}
上一页 12
💿 文件大小 5913 K
👤 上传用户 jjjjjkkkkjkjkjk
📂 所属分类 Java编程
🏷️ 相关标签

#lucene #版本
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -