📄 lucenesearchprovider.java
字号:
/*JSPWiki - a JSP-based WikiWiki clone.Copyright (C) 2005 Janne Jalkanen (Janne.Jalkanen@iki.fi)This program is free software; you can redistribute it and/or modifyit under the terms of the GNU Lesser General Public License as published bythe Free Software Foundation; either version 2.1 of the License, or(at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU Lesser General Public License for more details.You should have received a copy of the GNU Lesser General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/package com.ecyrd.jspwiki.search;import java.io.File;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.StringReader;import java.io.StringWriter;import java.util.ArrayList;import java.util.Collection;import java.util.Date;import java.util.Iterator;import java.util.Properties;import java.util.Vector;import org.apache.log4j.Logger;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.Hits;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.Searcher;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import com.ecyrd.jspwiki.FileUtil;import com.ecyrd.jspwiki.NoRequiredPropertyException;import com.ecyrd.jspwiki.SearchResult;import com.ecyrd.jspwiki.TextUtil;import com.ecyrd.jspwiki.WikiEngine;import com.ecyrd.jspwiki.WikiPage;import com.ecyrd.jspwiki.WikiProvider;import com.ecyrd.jspwiki.attachment.Attachment;import com.ecyrd.jspwiki.attachment.AttachmentManager;import com.ecyrd.jspwiki.providers.ProviderException;import com.ecyrd.jspwiki.providers.WikiPageProvider;import com.ecyrd.jspwiki.util.ClassUtil;/** * Interface for the search providers that handle searching the Wiki * * @author Arent-Jan Banck for Informatica * @since 2.2.21. */public class LuceneSearchProvider implements SearchProvider { private static final Logger log = Logger.getLogger(LuceneSearchProvider.class); private WikiEngine m_engine; // Lucene properties. /** Which analyzer to use. Default is StandardAnalyzer. */ public static final String PROP_LUCENE_ANALYZER = "jspwiki.lucene.analyzer"; private String m_analyzerClass = "org.apache.lucene.analysis.standard.StandardAnalyzer"; private static final String LUCENE_DIR = "lucene"; // Number of page updates before we optimize the index. public static final int LUCENE_OPTIMIZE_COUNT = 10; private static final String LUCENE_ID = "id"; private static final String LUCENE_PAGE_CONTENTS = "contents"; private static final String LUCENE_AUTHOR = "author"; private static final String LUCENE_ATTACHMENTS = "attachment"; private static final String LUCENE_PAGE_NAME = "name"; private String m_luceneDirectory = null; private int m_updateCount = 0; private Thread m_luceneUpdateThread = null; private Vector m_updates = new Vector(); // Vector because multi-threaded. public void initialize(WikiEngine engine, Properties props) throws NoRequiredPropertyException, IOException { m_engine = engine; m_luceneDirectory = engine.getWorkDir()+File.separator+LUCENE_DIR; // FIXME: Just to be simple for now, we will do full reindex // only if no files are in lucene directory. File dir = new File(m_luceneDirectory); log.info("Lucene enabled, cache will be in: "+dir.getAbsolutePath()); try { if( !dir.exists() ) { dir.mkdirs(); } if( !dir.exists() || !dir.canWrite() || !dir.canRead() ) { log.error("Cannot write to Lucene directory, disabling Lucene: "+dir.getAbsolutePath()); throw new IOException( "Invalid Lucene directory." ); } String[] filelist = dir.list(); if( filelist == null ) { throw new IOException( "Invalid Lucene directory: cannot produce listing: "+dir.getAbsolutePath()); } } catch ( IOException e ) { log.error("Problem while creating Lucene index - not using Lucene.", e); } startLuceneUpdateThread(); } /** * Performs a full Lucene reindex, if necessary. * @throws IOException */ private void doFullLuceneReindex() throws IOException { File dir = new File(m_luceneDirectory); String[] filelist = dir.list(); if( filelist == null ) { throw new IOException( "Invalid Lucene directory: cannot produce listing: "+dir.getAbsolutePath()); } try { if( filelist.length == 0 ) { // // No files? Reindex! // Date start = new Date(); IndexWriter writer = null; log.info("Starting Lucene reindexing, this can take a couple minutes..."); // // Do lock recovery, in case JSPWiki was shut down forcibly // Directory luceneDir = FSDirectory.getDirectory(dir,false); if( IndexReader.isLocked(luceneDir) ) { log.info("JSPWiki was shut down while Lucene was indexing - unlocking now."); IndexReader.unlock( luceneDir ); } try { writer = new IndexWriter( m_luceneDirectory, getLuceneAnalyzer(), true ); Collection allPages = m_engine.getPageManager().getAllPages(); for( Iterator iterator = allPages.iterator(); iterator.hasNext(); ) { WikiPage page = (WikiPage) iterator.next(); String text = m_engine.getPageManager().getPageText( page.getName(), WikiProvider.LATEST_VERSION ); luceneIndexPage( page, text, writer ); } Collection allAttachments = m_engine.getAttachmentManager().getAllAttachments(); for( Iterator iterator = allAttachments.iterator(); iterator.hasNext(); ) { Attachment att = (Attachment) iterator.next(); String text = getAttachmentContent( att.getName(), WikiProvider.LATEST_VERSION ); luceneIndexPage( att, text, writer ); } writer.optimize(); } finally { try { if( writer != null ) writer.close(); } catch( IOException e ) {} } Date end = new Date(); log.info("Full Lucene index finished in " + (end.getTime() - start.getTime()) + " milliseconds."); } else { log.info("Files found in Lucene directory, not reindexing."); } } catch( NoClassDefFoundError e ) { log.info("Lucene libraries do not exist - not using Lucene."); } catch ( IOException e ) { log.error("Problem while creating Lucene index - not using Lucene.", e); } catch ( ProviderException e ) { log.error("Problem reading pages while creating Lucene index (JSPWiki won't start.)", e); throw new IllegalArgumentException("unable to create Lucene index"); } catch( ClassNotFoundException e ) { log.error("Illegal Analyzer specified:",e); } catch( Exception e ) { log.error("Unable to start lucene",e); } } /** * Fetches the attachment content from the repository. * Content is flat text that can be used for indexing/searching or display */ private String getAttachmentContent( String attachmentName, int version ) { AttachmentManager mgr = m_engine.getAttachmentManager(); try { Attachment att = mgr.getAttachmentInfo( attachmentName, version ); //FIXME: Find out why sometimes att is null if(att != null) { return getAttachmentContent( att ); } } catch (ProviderException e) { log.error("Attachment cannot be loaded", e); } // Something was wrong, no result is returned. return null; } /** * @param att Attachment to get content for. Filename extension is used to determine the type of the attachment. * @return String representing the content of the file. * FIXME This is a very simple implementation of some text-based attachment, mainly used for testing. * This should be replaced /moved to Attachment search providers or some other 'plugable' wat to search attachments */ private String getAttachmentContent( Attachment att ) { AttachmentManager mgr = m_engine.getAttachmentManager(); //FIXME: Add attachment plugin structure String filename = att.getFileName(); if(filename.endsWith(".txt") || filename.endsWith(".xml") || filename.endsWith(".ini") || filename.endsWith(".html")) { InputStream attStream; try { attStream = mgr.getAttachmentStream( att ); StringWriter sout = new StringWriter(); FileUtil.copyContents( new InputStreamReader(attStream), sout ); attStream.close(); sout.close(); return sout.toString(); } catch (ProviderException e) { log.error("Attachment cannot be loaded", e); return null; } catch (IOException e) { log.error("Attachment cannot be loaded", e); return null; } } return null; } /*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -