dsindexer.java

来自「dspace 用j2ee架构的一个数字图书馆.开源程序」· Java 代码 · 共 793 行 · 第 1/2 页
JAVA
793 行
/* * DSIndexer.java * * Version: $Revision: 1.40 $ * * Date: $Date: 2006/03/16 18:05:09 $ * * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts * Institute of Technology.  All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the Hewlett-Packard Company nor the name of the * Massachusetts Institute of Technology nor the names of their * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */package org.dspace.search;import java.io.IOException;import java.io.InputStreamReader;import java.sql.SQLException;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import org.apache.log4j.Logger;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.dspace.authorize.AuthorizeException;import org.dspace.content.Bitstream;import org.dspace.content.Bundle;import org.dspace.content.Collection;import org.dspace.content.Community;import org.dspace.content.DCValue;import org.dspace.content.DSpaceObject;import org.dspace.content.Item;import org.dspace.content.ItemIterator;import org.dspace.content.MetadataSchema;import org.dspace.core.ConfigurationManager;import org.dspace.core.Constants;import org.dspace.core.Context;import org.dspace.core.LogManager;import org.dspace.handle.HandleManager;/** * DSIndexer contains the methods that index Items and their metadata, * collections, communities, etc. It is meant to either be invoked from the * command line (see dspace/bin/index-all) or via the indexContent() methods * within DSpace. */public class DSIndexer{    private static final Logger log = Logger.getLogger(DSIndexer.class);    // TODO: Support for analyzers per language, or multiple indices    /** The analyzer for this DSpace instance */    private static Analyzer analyzer = null;        /**     * IndexItem() adds a single item to the index     */    public static void indexContent(Context c, DSpaceObject dso)            throws SQLException, IOException    {        IndexWriter writer = openIndex(c, false);        try        {            switch (dso.getType())            {            case Constants.ITEM:                writeItemIndex(c, writer, (Item) dso);                break;            case Constants.COLLECTION:                writeCollectionIndex(c, writer, (Collection) dso);                break;            case Constants.COMMUNITY:                writeCommunityIndex(c, writer, (Community) dso);                break;            // FIXME: should probably default unknown type exception            }        }        finally        {            closeIndex(c, writer);        }    }    /**     * unIndex removes an Item, Collection, or Community only works if the     * DSpaceObject has a handle (uses the handle for its unique ID)     *      * @param dso     *            DSpace Object, can be Community, Item, or Collection     */    public static void unIndexContent(Context c, DSpaceObject dso)            throws SQLException, IOException    {        String h = HandleManager.findHandle(c, dso);        unIndexContent(c, h);    }    public static void unIndexContent(Context c, String myhandle)            throws SQLException, IOException    {        String index_directory = ConfigurationManager.getProperty("search.dir");        IndexReader ir = IndexReader.open(index_directory);        try        {            if (myhandle != null)            {                // we have a handle (our unique ID, so remove)                Term t = new Term("handle", myhandle);                ir.delete(t);            }            else            {                log.warn("unindex of content with null handle attempted");                // FIXME: no handle, fail quietly - should log failure                //System.out.println("Error in unIndexContent: Object had no                // handle!");            }        }        finally        {            ir.close();        }    }    /**     * reIndexContent removes something from the index, then re-indexes it     *      * @param c context object     * @param dso  object to re-index     */    public static void reIndexContent(Context c, DSpaceObject dso)            throws SQLException, IOException    {        unIndexContent(c, dso);        indexContent(c, dso);    }    /**     * create full index - wiping old index     *      * @param c   context to use     */    public static void createIndex(Context c) throws SQLException, IOException    {        IndexWriter writer = openIndex(c, true);        try        {            indexAllCommunities(c, writer);            indexAllCollections(c, writer);            indexAllItems(c, writer);            // optimize the index - important to do regularly to reduce            // filehandle            // usage            // and keep performance fast!            writer.optimize();        }        finally        {            closeIndex(c, writer);        }    }    /**     * When invoked as a command-line tool, (re)-builds the whole index     *      * @param args     *            the command-line arguments, none used     */    public static void main(String[] args) throws Exception    {        Context c = new Context();        // for testing, pass in a handle of something to remove...        if ((args.length == 2) && (args[0].equals("remove")))        {            unIndexContent(c, args[1]);        }        else        {            c.setIgnoreAuthorization(true);            createIndex(c);            System.out.println("Done with indexing");        }    }    /**     * Get the Lucene analyzer to use according to current configuration (or     * default). TODO: Should have multiple analyzers (and maybe indices?) for     * multi-lingual DSpaces.     *      * @return <code>Analyzer</code> to use     * @throws IllegalStateException     *             if the configured analyzer can't be instantiated     */    static Analyzer getAnalyzer() throws IllegalStateException    {        if (analyzer == null)        {            // We need to find the analyzer class from the configuration            String analyzerClassName = ConfigurationManager                    .getProperty("search.analyzer");            if (analyzerClassName == null)            {                // Use default                analyzerClassName = "org.dspace.search.DSAnalyzer";            }            try            {                Class analyzerClass = Class.forName(analyzerClassName);                analyzer = (Analyzer) analyzerClass.newInstance();            }            catch (Exception e)            {                log.fatal(LogManager.getHeader(null, "no_search_analyzer",                        "search.analyzer=" + analyzerClassName), e);                throw new IllegalStateException(e.toString());            }        }        return analyzer;    }            ////////////////////////////////////    //      Private    ////////////////////////////////////    /**     * prepare index, opening writer, and wiping out existing index if necessary     */    private static IndexWriter openIndex(Context c, boolean wipe_existing)            throws IOException    {        IndexWriter writer;        String index_directory = ConfigurationManager.getProperty("search.dir");        writer = new IndexWriter(index_directory, getAnalyzer(),                wipe_existing);        /* Set maximum number of terms to index if present in dspace.cfg */        if (ConfigurationManager.getProperty("search.maxfieldlength") != null)        {            int maxfieldlength = ConfigurationManager                    .getIntProperty("search.maxfieldlength");            if (maxfieldlength == -1)            {                writer.maxFieldLength = Integer.MAX_VALUE;            }            else            {                writer.maxFieldLength = maxfieldlength;            }        }        return writer;    }    /**     * close up the indexing engine     */    private static void closeIndex(Context c, IndexWriter writer)            throws IOException    {        if (writer != null)        {            writer.close();        }    }    private static String buildItemLocationString(Context c, Item myitem)            throws SQLException    {        // build list of community ids        Community[] communities = myitem.getCommunities();        // build list of collection ids        Collection[] collections = myitem.getCollections();        // now put those into strings        String location = "";        int i = 0;        for (i = 0; i < communities.length; i++)            location = new String(location + " m" + communities[i].getID());        for (i = 0; i < collections.length; i++)            location = new String(location + " l" + collections[i].getID());        return location;    }    private static String buildCollectionLocationString(Context c,            Collection target) throws SQLException    {        // build list of community ids        Community[] communities = target.getCommunities();        // now put those into strings        String location = "";        int i = 0;        for (i = 0; i < communities.length; i++)            location = new String(location + " m" + communities[i].getID());        return location;    }    /**     * iterate through the communities, and index each one     */    private static void indexAllCommunities(Context c, IndexWriter writer)            throws SQLException, IOException    {        Community[] targets = Community.findAll(c);        int i;        for (i = 0; i < targets.length; i++)            writeCommunityIndex(c, writer, targets[i]);    }    /**     * iterate through collections, indexing each one     */    private static void indexAllCollections(Context c, IndexWriter writer)            throws SQLException, IOException    {        Collection[] targets = Collection.findAll(c);        int i;        for (i = 0; i < targets.length; i++)            writeCollectionIndex(c, writer, targets[i]);    }    /**     * iterate through all items, indexing each one     */    private static void indexAllItems(Context c, IndexWriter writer)            throws SQLException, IOException    {        ItemIterator i = Item.findAll(c);        while (i.hasNext())        {            Item target = (Item) i.next();            writeItemIndex(c, writer, target);
dsindexer.java - 源码说明

本页面展示了「dspace 用j2ee架构的一个数字图书馆.开源程序」中的 dsindexer.java 源码文件，采用 Java 编程语言编写，共 793 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与dspace相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?