📄 dsindexer.java
字号:
/* * DSIndexer.java * * Version: $Revision: 1.40 $ * * Date: $Date: 2006/03/16 18:05:09 $ * * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts * Institute of Technology. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the Hewlett-Packard Company nor the name of the * Massachusetts Institute of Technology nor the names of their * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */package org.dspace.search;import java.io.IOException;import java.io.InputStreamReader;import java.sql.SQLException;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import org.apache.log4j.Logger;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.dspace.authorize.AuthorizeException;import org.dspace.content.Bitstream;import org.dspace.content.Bundle;import org.dspace.content.Collection;import org.dspace.content.Community;import org.dspace.content.DCValue;import org.dspace.content.DSpaceObject;import org.dspace.content.Item;import org.dspace.content.ItemIterator;import org.dspace.content.MetadataSchema;import org.dspace.core.ConfigurationManager;import org.dspace.core.Constants;import org.dspace.core.Context;import org.dspace.core.LogManager;import org.dspace.handle.HandleManager;/** * DSIndexer contains the methods that index Items and their metadata, * collections, communities, etc. It is meant to either be invoked from the * command line (see dspace/bin/index-all) or via the indexContent() methods * within DSpace. */public class DSIndexer{ private static final Logger log = Logger.getLogger(DSIndexer.class); // TODO: Support for analyzers per language, or multiple indices /** The analyzer for this DSpace instance */ private static Analyzer analyzer = null; /** * IndexItem() adds a single item to the index */ public static void indexContent(Context c, DSpaceObject dso) throws SQLException, IOException { IndexWriter writer = openIndex(c, false); try { switch (dso.getType()) { case Constants.ITEM: writeItemIndex(c, writer, (Item) dso); break; case Constants.COLLECTION: writeCollectionIndex(c, writer, (Collection) dso); break; case Constants.COMMUNITY: writeCommunityIndex(c, writer, (Community) dso); break; // FIXME: should probably default unknown type exception } } finally { closeIndex(c, writer); } } /** * unIndex removes an Item, Collection, or Community only works if the * DSpaceObject has a handle (uses the handle for its unique ID) * * @param dso * DSpace Object, can be Community, Item, or Collection */ public static void unIndexContent(Context c, DSpaceObject dso) throws SQLException, IOException { String h = HandleManager.findHandle(c, dso); unIndexContent(c, h); } public static void unIndexContent(Context c, String myhandle) throws SQLException, IOException { String index_directory = ConfigurationManager.getProperty("search.dir"); IndexReader ir = IndexReader.open(index_directory); try { if (myhandle != null) { // we have a handle (our unique ID, so remove) Term t = new Term("handle", myhandle); ir.delete(t); } else { log.warn("unindex of content with null handle attempted"); // FIXME: no handle, fail quietly - should log failure //System.out.println("Error in unIndexContent: Object had no // handle!"); } } finally { ir.close(); } } /** * reIndexContent removes something from the index, then re-indexes it * * @param c context object * @param dso object to re-index */ public static void reIndexContent(Context c, DSpaceObject dso) throws SQLException, IOException { unIndexContent(c, dso); indexContent(c, dso); } /** * create full index - wiping old index * * @param c context to use */ public static void createIndex(Context c) throws SQLException, IOException { IndexWriter writer = openIndex(c, true); try { indexAllCommunities(c, writer); indexAllCollections(c, writer); indexAllItems(c, writer); // optimize the index - important to do regularly to reduce // filehandle // usage // and keep performance fast! writer.optimize(); } finally { closeIndex(c, writer); } } /** * When invoked as a command-line tool, (re)-builds the whole index * * @param args * the command-line arguments, none used */ public static void main(String[] args) throws Exception { Context c = new Context(); // for testing, pass in a handle of something to remove... if ((args.length == 2) && (args[0].equals("remove"))) { unIndexContent(c, args[1]); } else { c.setIgnoreAuthorization(true); createIndex(c); System.out.println("Done with indexing"); } } /** * Get the Lucene analyzer to use according to current configuration (or * default). TODO: Should have multiple analyzers (and maybe indices?) for * multi-lingual DSpaces. * * @return <code>Analyzer</code> to use * @throws IllegalStateException * if the configured analyzer can't be instantiated */ static Analyzer getAnalyzer() throws IllegalStateException { if (analyzer == null) { // We need to find the analyzer class from the configuration String analyzerClassName = ConfigurationManager .getProperty("search.analyzer"); if (analyzerClassName == null) { // Use default analyzerClassName = "org.dspace.search.DSAnalyzer"; } try { Class analyzerClass = Class.forName(analyzerClassName); analyzer = (Analyzer) analyzerClass.newInstance(); } catch (Exception e) { log.fatal(LogManager.getHeader(null, "no_search_analyzer", "search.analyzer=" + analyzerClassName), e); throw new IllegalStateException(e.toString()); } } return analyzer; } //////////////////////////////////// // Private //////////////////////////////////// /** * prepare index, opening writer, and wiping out existing index if necessary */ private static IndexWriter openIndex(Context c, boolean wipe_existing) throws IOException { IndexWriter writer; String index_directory = ConfigurationManager.getProperty("search.dir"); writer = new IndexWriter(index_directory, getAnalyzer(), wipe_existing); /* Set maximum number of terms to index if present in dspace.cfg */ if (ConfigurationManager.getProperty("search.maxfieldlength") != null) { int maxfieldlength = ConfigurationManager .getIntProperty("search.maxfieldlength"); if (maxfieldlength == -1) { writer.maxFieldLength = Integer.MAX_VALUE; } else { writer.maxFieldLength = maxfieldlength; } } return writer; } /** * close up the indexing engine */ private static void closeIndex(Context c, IndexWriter writer) throws IOException { if (writer != null) { writer.close(); } } private static String buildItemLocationString(Context c, Item myitem) throws SQLException { // build list of community ids Community[] communities = myitem.getCommunities(); // build list of collection ids Collection[] collections = myitem.getCollections(); // now put those into strings String location = ""; int i = 0; for (i = 0; i < communities.length; i++) location = new String(location + " m" + communities[i].getID()); for (i = 0; i < collections.length; i++) location = new String(location + " l" + collections[i].getID()); return location; } private static String buildCollectionLocationString(Context c, Collection target) throws SQLException { // build list of community ids Community[] communities = target.getCommunities(); // now put those into strings String location = ""; int i = 0; for (i = 0; i < communities.length; i++) location = new String(location + " m" + communities[i].getID()); return location; } /** * iterate through the communities, and index each one */ private static void indexAllCommunities(Context c, IndexWriter writer) throws SQLException, IOException { Community[] targets = Community.findAll(c); int i; for (i = 0; i < targets.length; i++) writeCommunityIndex(c, writer, targets[i]); } /** * iterate through collections, indexing each one */ private static void indexAllCollections(Context c, IndexWriter writer) throws SQLException, IOException { Collection[] targets = Collection.findAll(c); int i; for (i = 0; i < targets.length; i++) writeCollectionIndex(c, writer, targets[i]); } /** * iterate through all items, indexing each one */ private static void indexAllItems(Context c, IndexWriter writer) throws SQLException, IOException { ItemIterator i = Item.findAll(c); while (i.hasNext()) { Item target = (Item) i.next(); writeItemIndex(c, writer, target);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -