📄 lucenesearchprovider.java

📁 jspwiki source code,jspwiki source code
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*    JSPWiki - a JSP-based WikiWiki clone.    Licensed to the Apache Software Foundation (ASF) under one    or more contributor license agreements.  See the NOTICE file    distributed with this work for additional information    regarding copyright ownership.  The ASF licenses this file    to you under the Apache License, Version 2.0 (the    "License"); you may not use this file except in compliance    with the License.  You may obtain a copy of the License at       http://www.apache.org/licenses/LICENSE-2.0    Unless required by applicable law or agreed to in writing,    software distributed under the License is distributed on an    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY    KIND, either express or implied.  See the License for the    specific language governing permissions and limitations    under the License.     */package com.ecyrd.jspwiki.search;import java.io.*;import java.util.*;import org.apache.commons.lang.StringUtils;import org.apache.log4j.Logger;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.apache.lucene.queryParser.MultiFieldQueryParser;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.Hits;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.Searcher;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleHTMLEncoder;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import com.ecyrd.jspwiki.*;import com.ecyrd.jspwiki.attachment.Attachment;import com.ecyrd.jspwiki.attachment.AttachmentManager;import com.ecyrd.jspwiki.parser.MarkupParser;import com.ecyrd.jspwiki.providers.ProviderException;import com.ecyrd.jspwiki.providers.WikiPageProvider;import com.ecyrd.jspwiki.util.ClassUtil;import com.ecyrd.jspwiki.util.WatchDog;import com.ecyrd.jspwiki.util.WikiBackgroundThread;/** *  Interface for the search providers that handle searching the Wiki * *  @author Arent-Jan Banck *  @since 2.2.21. */public class LuceneSearchProvider implements SearchProvider{    protected static final Logger log = Logger.getLogger(LuceneSearchProvider.class);    private WikiEngine m_engine;    // Lucene properties.    /** Which analyzer to use.  Default is StandardAnalyzer. */    public static final String PROP_LUCENE_ANALYZER    = "jspwiki.lucene.analyzer";    private static final String PROP_LUCENE_INDEXDELAY   = "jspwiki.lucene.indexdelay";    private static final String PROP_LUCENE_INITIALDELAY = "jspwiki.lucene.initialdelay";    private String m_analyzerClass = "org.apache.lucene.analysis.standard.StandardAnalyzer";    private static final String LUCENE_DIR             = "lucene";    /**     *  Number of page updates before we optimize the index.     */    public static final int LUCENE_OPTIMIZE_COUNT      = 10;    protected static final String LUCENE_ID            = "id";    protected static final String LUCENE_PAGE_CONTENTS = "contents";    protected static final String LUCENE_AUTHOR        = "author";    protected static final String LUCENE_ATTACHMENTS   = "attachment";    protected static final String LUCENE_PAGE_NAME     = "name";    private String           m_luceneDirectory = null;    private int              m_updateCount = 0;    protected Vector<Object[]> m_updates = new Vector<Object[]>(); // Vector because multi-threaded.    /** Maximum number of fragments from search matches. */    private static final int MAX_FRAGMENTS = 3;    private static String c_punctuationSpaces = StringUtils.repeat(" ", MarkupParser.PUNCTUATION_CHARS_ALLOWED.length() );    /**     *  {@inheritDoc}     */    public void initialize(WikiEngine engine, Properties props)            throws NoRequiredPropertyException, IOException    {        m_engine = engine;        m_luceneDirectory = engine.getWorkDir()+File.separator+LUCENE_DIR;        int initialDelay = TextUtil.getIntegerProperty( props, PROP_LUCENE_INITIALDELAY, LuceneUpdater.INITIAL_DELAY );        int indexDelay   = TextUtil.getIntegerProperty( props, PROP_LUCENE_INDEXDELAY, LuceneUpdater.INDEX_DELAY );        m_analyzerClass = TextUtil.getStringProperty( props, PROP_LUCENE_ANALYZER, m_analyzerClass );        // FIXME: Just to be simple for now, we will do full reindex        // only if no files are in lucene directory.        File dir = new File(m_luceneDirectory);        log.info("Lucene enabled, cache will be in: "+dir.getAbsolutePath());        try        {            if( !dir.exists() )            {                dir.mkdirs();            }            if( !dir.exists() || !dir.canWrite() || !dir.canRead() )            {                log.error("Cannot write to Lucene directory, disabling Lucene: "+dir.getAbsolutePath());                throw new IOException( "Invalid Lucene directory." );            }            String[] filelist = dir.list();            if( filelist == null )            {                throw new IOException( "Invalid Lucene directory: cannot produce listing: "+dir.getAbsolutePath());            }        }        catch ( IOException e )        {            log.error("Problem while creating Lucene index - not using Lucene.", e);        }        // Start the Lucene update thread, which waits first        // for a little while before starting to go through        // the Lucene "pages that need updating".        LuceneUpdater updater = new LuceneUpdater( m_engine, this, initialDelay, indexDelay );        updater.start();    }    /**     *  Returns the handling engine.     *     *  @return Current WikiEngine     */    protected WikiEngine getEngine()    {        return m_engine;    }    /**     *  Performs a full Lucene reindex, if necessary.     *     *  @throws IOException If there's a problem during indexing     */    protected void doFullLuceneReindex()        throws IOException    {        File dir = new File(m_luceneDirectory);        String[] filelist = dir.list();        if( filelist == null )        {            throw new IOException( "Invalid Lucene directory: cannot produce listing: "+dir.getAbsolutePath());        }        try        {            if( filelist.length == 0 )            {                //                //  No files? Reindex!                //                Date start = new Date();                IndexWriter writer = null;                log.info("Starting Lucene reindexing, this can take a couple minutes...");                //                //  Do lock recovery, in case JSPWiki was shut down forcibly                //                Directory luceneDir = FSDirectory.getDirectory(dir,false);                if( IndexReader.isLocked(luceneDir) )                {                    log.info("JSPWiki was shut down while Lucene was indexing - unlocking now.");                    IndexReader.unlock( luceneDir );                }                try                {                    writer = new IndexWriter( m_luceneDirectory,                                              getLuceneAnalyzer(),                                              true );                    Collection allPages = m_engine.getPageManager().getAllPages();                    for( Iterator iterator = allPages.iterator(); iterator.hasNext(); )                    {                        WikiPage page = (WikiPage) iterator.next();                                                try                        {                            String text = m_engine.getPageManager().getPageText( page.getName(),                                                                                 WikiProvider.LATEST_VERSION );                            luceneIndexPage( page, text, writer );                        }                        catch( IOException e )                        {                            log.warn( "Unable to index page " + page.getName() + ", continuing to next ", e );                        }                    }                    Collection allAttachments = m_engine.getAttachmentManager().getAllAttachments();                    for( Iterator iterator = allAttachments.iterator(); iterator.hasNext(); )                    {                        Attachment att = (Attachment) iterator.next();                                                try                        {                            String text = getAttachmentContent( att.getName(),                                                                WikiProvider.LATEST_VERSION );                            luceneIndexPage( att, text, writer );                        }                        catch( IOException e )                        {                            log.warn( "Unable to index attachment " + att.getName() + ", continuing to next", e );                        }                    }                    writer.optimize();                }                finally                {                    try                    {                        if( writer != null ) writer.close();                    }                    catch( IOException e ) {}                }                Date end = new Date();                log.info("Full Lucene index finished in " +                         (end.getTime() - start.getTime()) + " milliseconds.");            }            else            {                log.info("Files found in Lucene directory, not reindexing.");            }        }        catch( NoClassDefFoundError e )        {            log.info("Lucene libraries do not exist - not using Lucene.");        }        catch ( IOException e )        {            log.error("Problem while creating Lucene index - not using Lucene.", e);        }        catch ( ProviderException e )        {            log.error("Problem reading pages while creating Lucene index (JSPWiki won't start.)", e);            throw new IllegalArgumentException("unable to create Lucene index");        }        catch( ClassNotFoundException e )        {            log.error("Illegal Analyzer specified:",e);        }        catch( Exception e )        {            log.error("Unable to start lucene",e);        }    }    /**     *  Fetches the attachment content from the repository.     *  Content is flat text that can be used for indexing/searching or display     *       *  @param attachmentName Name of the attachment.     *  @param version The version of the attachment.     *       *  @return the content of the Attachment as a String.     */    protected String getAttachmentContent( String attachmentName, int version )    {        AttachmentManager mgr = m_engine.getAttachmentManager();        try        {            Attachment att = mgr.getAttachmentInfo( attachmentName, version );            //FIXME: Find out why sometimes att is null            if(att != null)            {                return getAttachmentContent( att );            }        }        catch (ProviderException e)        {            log.error("Attachment cannot be loaded", e);        }        // Something was wrong, no result is returned.        return null;    }    /**     * @param att Attachment to get content for. Filename extension is used to determine the type of the attachment.     * @return String representing the content of the file.     * FIXME This is a very simple implementation of some text-based attachment, mainly used for testing.     * This should be replaced /moved to Attachment search providers or some other 'plugable' wat to search attachments     */    protected String getAttachmentContent( Attachment att )    {        AttachmentManager mgr = m_engine.getAttachmentManager();        //FIXME: Add attachment plugin structure        String filename = att.getFileName();        if(filename.endsWith(".txt") ||           filename.endsWith(".xml") ||           filename.endsWith(".ini") ||           filename.endsWith(".html"))        {            InputStream attStream;            try            {                attStream = mgr.getAttachmentStream( att );                StringWriter sout = new StringWriter();                FileUtil.copyContents( new InputStreamReader(attStream), sout );                attStream.close();                sout.close();                return sout.toString();            }            catch (ProviderException e)            {                log.error("Attachment cannot be loaded", e);                return null;            }            catch (IOException e)            {                log.error("Attachment cannot be loaded", e);                return null;            }        }        return null;    }    /**     *  Updates the lucene index for a single page.     *     *  @param page The WikiPage to check     *  @param text The page text to index.     */    protected synchronized void updateLuceneIndex( WikiPage page, String text )    {        IndexWriter writer = null;        log.debug("Updating Lucene index for page '" + page.getName() + "'...");        try        {            pageRemoved(page);            // Now add back the new version.            writer = new IndexWriter(m_luceneDirectory, getLuceneAnalyzer(), false);            luceneIndexPage(page, text, writer);            m_updateCount++;            if( m_updateCount >= LUCENE_OPTIMIZE_COUNT )            {                writer.optimize();                m_updateCount = 0;            }        }        catch ( IOException e )        {            log.error("Unable to update page '" + page.getName() + "' from Lucene index", e);        }
12 下一页
💿 文件大小 15040 K
👤 上传用户 add505
📂 所属分类 Java编程
🏷️ 相关标签

#jspwiki #source #code
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -