📄 lucenemethods.java

📁 lucene2.2.0版本
💻 JAVA
字号:
package lucli;/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2001 The Apache Software Foundation.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * 3. The end-user documentation included with the redistribution, *    if any, must include the following acknowledgment: *       "This product includes software developed by the *        Apache Software Foundation (http://www.apache.org/)." *    Alternately, this acknowledgment may appear in the software itself, *    if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and *    "Apache Lucene" must not be used to endorse or promote products *    derived from this software without prior written permission. For *    written permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", *    "Apache Lucene", nor may "Apache" appear in their name, without *    prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation.  For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */import java.io.IOException;import java.io.Reader;import java.io.StringReader;import java.util.Hashtable;import java.util.Vector;import java.util.TreeMap;import java.util.Map.Entry;import java.util.Set;import java.util.Arrays;import java.util.Comparator;import java.util.Iterator;import java.util.Enumeration;import jline.ConsoleReader;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.Token;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.apache.lucene.index.TermEnum;import org.apache.lucene.index.IndexReader.FieldOption;import org.apache.lucene.queryParser.MultiFieldQueryParser;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.search.Explanation;import org.apache.lucene.search.Hits;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.Searcher;/** * Various methods that interact with Lucene and provide info about the  * index, search, etc. Parts addapted from Lucene demo. */class LuceneMethods {  private int numDocs;  private String indexName; //directory of this index  private java.util.Iterator fieldIterator;  private Vector fields; //Fields as a vector  private Vector indexedFields; //Fields as a vector  private String fieldsArray[]; //Fields as an array  private Searcher searcher;  private Query query; //current query string  public LuceneMethods(String index) {    indexName = index;    message("Lucene CLI. Using directory '" + indexName + "'. Type 'help' for instructions.");  }  public void info() throws java.io.IOException {    IndexReader indexReader = IndexReader.open(indexName);    getFieldInfo();    numDocs = indexReader.numDocs();    message("Index has " + numDocs + " documents ");    message("All Fields:" + fields.toString());    message("Indexed Fields:" + indexedFields.toString());    if (IndexReader.isLocked(indexName)) {      message("Index is locked");    }    //IndexReader.getCurrentVersion(indexName);    //System.out.println("Version:" + version);    indexReader.close();  }  public void search(String queryString, boolean explain, boolean showTokens, ConsoleReader cr)  		throws java.io.IOException, org.apache.lucene.queryParser.ParseException {    Hits hits = initSearch(queryString);    System.out.println(hits.length() + " total matching documents");    if (explain) {      query = explainQuery(queryString);    }    final int HITS_PER_PAGE = 10;    message("--------------------------------------");    for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {      int end = Math.min(hits.length(), start + HITS_PER_PAGE);      for (int ii = start; ii < end; ii++) {        Document doc = hits.doc(ii);        message("---------------- " + (ii + 1) + " score:" + hits.score(ii) + "---------------------");        printHit(doc);        if (showTokens) {          invertDocument(doc);        }        if (explain) {          Explanation exp = searcher.explain(query, hits.id(ii));          message("Explanation:" + exp.toString());        }      }      message("#################################################");      if (hits.length() > end) {      	// TODO: don't let the input end up in the command line history      	queryString = cr.readLine("more (y/n) ? ");        if (queryString.length() == 0 || queryString.charAt(0) == 'n')          break;      }    }    searcher.close();  }  /**   * @todo Allow user to specify what field(s) to display   */  private void printHit(Document doc) {    for (int ii = 0; ii < fieldsArray.length; ii++) {      String currField = fieldsArray[ii];      String[] result = doc.getValues(currField);      if (result != null) {        for (int i = 0; i < result.length; i++) {          message(currField + ":" + result[i]);        }      } else {        message(currField + ": <not available>");      }    }    //another option is to just do message(doc);  }  public void optimize() throws IOException {    //open the index writer. False: don't create a new one    IndexWriter indexWriter = new IndexWriter(indexName, new StandardAnalyzer(), false);    message("Starting to optimize index.");    long start = System.currentTimeMillis();    indexWriter.optimize();    message("Done optimizing index. Took " + (System.currentTimeMillis() - start) + " msecs");    indexWriter.close();  }  private Query explainQuery(String queryString) throws IOException, ParseException {    searcher = new IndexSearcher(indexName);    Analyzer analyzer = new StandardAnalyzer();    getFieldInfo();    int arraySize = indexedFields.size();    String indexedArray[] = new String[arraySize];    for (int ii = 0; ii < arraySize; ii++) {      indexedArray[ii] = (String) indexedFields.get(ii);    }    MultiFieldQueryParser parser = new MultiFieldQueryParser(indexedArray, analyzer);    query = parser.parse(queryString);    System.out.println("Searching for: " + query.toString());    return (query);  }  /**   * @todo Allow user to specify analyzer   */  private Hits initSearch(String queryString) throws IOException, ParseException {    searcher = new IndexSearcher(indexName);    Analyzer analyzer = new StandardAnalyzer();    getFieldInfo();    int arraySize = fields.size();    fieldsArray = new String[arraySize];    for (int ii = 0; ii < arraySize; ii++) {      fieldsArray[ii] = (String) fields.get(ii);    }    MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldsArray, analyzer);    query = parser.parse(queryString);    System.out.println("Searching for: " + query.toString());    Hits hits = searcher.search(query);    return (hits);  }  public void count(String queryString) throws java.io.IOException, ParseException {    Hits hits = initSearch(queryString);    System.out.println(hits.length() + " total documents");    searcher.close();  }  static public void message(String s) {    System.out.println(s);  }  private void getFieldInfo() throws IOException {    IndexReader indexReader = IndexReader.open(indexName);    fields = new Vector();    indexedFields = new Vector();    //get the list of all field names    fieldIterator = indexReader.getFieldNames(FieldOption.ALL).iterator();    while (fieldIterator.hasNext()) {      Object field = fieldIterator.next();      if (field != null && !field.equals(""))        fields.add(field.toString());    }    //    //get the list of indexed field names    fieldIterator = indexReader.getFieldNames(FieldOption.INDEXED).iterator();    while (fieldIterator.hasNext()) {      Object field = fieldIterator.next();      if (field != null && !field.equals(""))        indexedFields.add(field.toString());    }    indexReader.close();  }  // Copied from DocumentWriter  // Tokenizes the fields of a document into Postings.  private void invertDocument(Document doc)    throws IOException {    Hashtable tokenHash = new Hashtable();    final int maxFieldLength = 10000;    Analyzer analyzer = new StandardAnalyzer();    Enumeration fields = doc.fields();    while (fields.hasMoreElements()) {      Field field = (Field) fields.nextElement();      String fieldName = field.name();      if (field.isIndexed()) {        if (field.isTokenized()) {     // un-tokenized field          Reader reader;        // find or make Reader          if (field.readerValue() != null)            reader = field.readerValue();          else if (field.stringValue() != null)            reader = new StringReader(field.stringValue());          else            throw new IllegalArgumentException              ("field must have either String or Reader value");          int position = 0;          // Tokenize field and add to postingTable          TokenStream stream = analyzer.tokenStream(fieldName, reader);          try {            for (Token t = stream.next(); t != null; t = stream.next()) {              position += (t.getPositionIncrement() - 1);              position++;              String name = t.termText();              Integer Count = (Integer) tokenHash.get(name);              if (Count == null) { // not in there yet                tokenHash.put(name, new Integer(1)); //first one              } else {                int count = Count.intValue();                tokenHash.put(name, new Integer(count + 1));              }              if (position > maxFieldLength) break;            }          } finally {            stream.close();          }        }      }    }    Entry[] sortedHash = getSortedHashtableEntries(tokenHash);    for (int ii = 0; ii < sortedHash.length && ii < 10; ii++) {      Entry currentEntry = sortedHash[ii];      message((ii + 1) + ":" + currentEntry.getKey() + " " + currentEntry.getValue());    }  }  /** Provides a list of the top terms of the index.   *   * @param field  - the name of the command or null for all of them.   */  public void terms(String field) throws IOException {    TreeMap termMap = new TreeMap();    IndexReader indexReader = IndexReader.open(indexName);    TermEnum terms = indexReader.terms();    while (terms.next()) {      Term term = terms.term();      //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());      //if we're either not looking by field or we're matching the specific field      if ((field == null) || field.equals(term.field()))        termMap.put(term.field() + ":" + term.text(), new Integer((terms.docFreq())));    }    Iterator termIterator = termMap.keySet().iterator();    for (int ii = 0; termIterator.hasNext() && ii < 100; ii++) {      String termDetails = (String) termIterator.next();      Integer termFreq = (Integer) termMap.get(termDetails);      message(termDetails + ": " + termFreq);    }    indexReader.close();  }  /** Sort Hashtable values   * @param h the hashtable we're sorting   * from http://developer.java.sun.com/developer/qow/archive/170/index.jsp   */  public static Entry[]    getSortedHashtableEntries(Hashtable h) {    Set set = h.entrySet();    Entry[] entries =      (Entry[]) set.toArray(        new Entry[set.size()]);    Arrays.sort(entries, new Comparator() {      public int compare(Object o1, Object o2) {        Object v1 = ((Entry) o1).getValue();        Object v2 = ((Entry) o2).getValue();        return ((Comparable) v2).compareTo(v1); //descending order      }    });    return entries;  }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -