📄 lucenesearchprovider.java

📁 jspwiki source code,jspwiki source code
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
        catch( Exception e )        {            log.error("Unexpected Lucene exception - please check configuration!",e);        }        finally        {            try            {                if( writer != null ) writer.close();            }            catch( IOException e ) {}        }        log.debug("Done updating Lucene index for page '" + page.getName() + "'.");    }    private Analyzer getLuceneAnalyzer()        throws ClassNotFoundException,               InstantiationException,               IllegalAccessException    {        Class clazz = ClassUtil.findClass( "", m_analyzerClass );        Analyzer analyzer = (Analyzer)clazz.newInstance();        return analyzer;    }    /**     *  Indexes page using the given IndexWriter.     *     *  @param page WikiPage     *  @param text Page text to index     *  @param writer The Lucene IndexWriter to use for indexing     *  @return the created index Document     *  @throws IOException If there's an indexing problem     */    protected Document luceneIndexPage( WikiPage page, String text, IndexWriter writer )        throws IOException    {        if( log.isDebugEnabled() ) log.debug( "Indexing "+page.getName()+"..." );                // make a new, empty document        Document doc = new Document();        if( text == null ) return doc;        // Raw name is the keyword we'll use to refer to this document for updates.        Field field = new Field(LUCENE_ID, page.getName(), Field.Store.YES, Field.Index.UN_TOKENIZED);        doc.add( field );        // Body text.  It is stored in the doc for search contexts.        field = new Field(LUCENE_PAGE_CONTENTS, text,                          Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);        doc.add( field );        // Allow searching by page name. Both beautified and raw        String unTokenizedTitle = StringUtils.replaceChars( page.getName(),                                                            MarkupParser.PUNCTUATION_CHARS_ALLOWED,                                                            c_punctuationSpaces );        field = new Field(LUCENE_PAGE_NAME,                          TextUtil.beautifyString( page.getName() ) + " " + unTokenizedTitle,                          Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);        doc.add( field );        // Allow searching by authorname        if( page.getAuthor() != null )        {            field = new Field(LUCENE_AUTHOR, page.getAuthor(),                              Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);            doc.add( field );        }        // Now add the names of the attachments of this page        try        {            Collection attachments = m_engine.getAttachmentManager().listAttachments(page);            String attachmentNames = "";            for( Iterator it = attachments.iterator(); it.hasNext(); )            {                Attachment att = (Attachment) it.next();                attachmentNames += att.getName() + ";";            }            field = new Field(LUCENE_ATTACHMENTS, attachmentNames,                              Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);            doc.add( field );        }        catch(ProviderException e)        {            // Unable to read attachments            log.error("Failed to get attachments for page", e);        }        writer.addDocument(doc);        return doc;    }    /**     *  {@inheritDoc}     */    public void pageRemoved( WikiPage page )    {        try        {            // Must first remove existing version of page.            IndexReader reader = IndexReader.open(m_luceneDirectory);            reader.deleteDocuments(new Term(LUCENE_ID, page.getName()));            reader.close();        }        catch ( IOException e )        {            log.error("Unable to update page '" + page.getName() + "' from Lucene index", e);        }    }    /**     *  Adds a page-text pair to the lucene update queue.  Safe to call always     *     *  @param page WikiPage to add to the update queue.     */    public void reindexPage( WikiPage page )    {        if( page != null )        {            String text;            // TODO: Think if this was better done in the thread itself?            if( page instanceof Attachment )            {                text = getAttachmentContent( (Attachment) page );            }            else            {                text = m_engine.getPureText( page );            }            if( text != null )            {                // Add work item to m_updates queue.                Object[] pair = new Object[2];                pair[0] = page;                pair[1] = text;                m_updates.add(pair);                log.debug("Scheduling page " + page.getName() + " for index update");            }        }    }    /**     *  {@inheritDoc}     */    public Collection findPages( String query )        throws ProviderException    {        return findPages( query, FLAG_CONTEXTS );    }    /**     *  Create contexts also.  Generating contexts can be expensive,     *  so they're not on by default.     */    public static final int FLAG_CONTEXTS = 0x01;    /**     *  Searches pages using a particular combination of flags.     *     *  @param query The query to perform in Lucene query language     *  @param flags A set of flags     *  @return A Collection of SearchResult instances     *  @throws ProviderException if there is a problem with the backend     */    public Collection findPages( String query, int flags )        throws ProviderException    {        Searcher  searcher = null;        ArrayList<SearchResult> list = null;        Highlighter highlighter = null;        try        {            String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };            QueryParser qp = new MultiFieldQueryParser( queryfields, getLuceneAnalyzer() );            //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );            Query luceneQuery = qp.parse( query );            if( (flags & FLAG_CONTEXTS) != 0 )            {                highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),                                              new SimpleHTMLEncoder(),                                              new QueryScorer(luceneQuery));            }            try            {                searcher = new IndexSearcher(m_luceneDirectory);            }            catch( Exception ex )            {                log.info("Lucene not yet ready; indexing not started",ex);                return null;            }            Hits hits = searcher.search(luceneQuery);            list = new ArrayList<SearchResult>(hits.length());            for ( int curr = 0; curr < hits.length(); curr++ )            {                Document doc = hits.doc(curr);                String pageName = doc.get(LUCENE_ID);                WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);                if(page != null)                {                    if(page instanceof Attachment)                    {                        // Currently attachments don't look nice on the search-results page                        // When the search-results are cleaned up this can be enabled again.                    }                    int score = (int)(hits.score(curr) * 100);                    // Get highlighted search contexts                    String text = doc.get(LUCENE_PAGE_CONTENTS);                    String[] fragments = new String[0];                    if( text != null && highlighter != null )                    {                        TokenStream tokenStream = getLuceneAnalyzer()                        .tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text));                        fragments = highlighter.getBestFragments(tokenStream,                                                                 text, MAX_FRAGMENTS);                    }                    SearchResult result = new SearchResultImpl( page, score, fragments );                         list.add(result);                }                else                {                    log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache");                    pageRemoved(new WikiPage( m_engine, pageName ));                }            }        }        catch( IOException e )        {            log.error("Failed during lucene search",e);        }        catch( InstantiationException e )        {            log.error("Unable to get a Lucene analyzer",e);        }        catch( IllegalAccessException e )        {            log.error("Unable to get a Lucene analyzer",e);        }        catch( ClassNotFoundException e )        {            log.error("Specified Lucene analyzer does not exist",e);        }        catch( ParseException e )        {            log.info("Broken query; cannot parse",e);            throw new ProviderException("You have entered a query Lucene cannot process: "+e.getMessage());        }        finally        {            if( searcher != null )            {                try                {                    searcher.close();                }                catch( IOException e )                {}            }        }        return list;    }    /**     *  {@inheritDoc}     */    public String getProviderInfo()    {        return "LuceneSearchProvider";    }    /**     * Updater thread that updates Lucene indexes.     */    private static final class LuceneUpdater extends WikiBackgroundThread    {        protected static final int INDEX_DELAY    = 5;        protected static final int INITIAL_DELAY = 60;        private final LuceneSearchProvider m_provider;        private int m_initialDelay;        private WatchDog m_watchdog;        private LuceneUpdater( WikiEngine engine, LuceneSearchProvider provider,                               int initialDelay, int indexDelay )        {            super( engine, indexDelay );            m_provider = provider;            setName("JSPWiki Lucene Indexer");        }        public void startupTask() throws Exception        {            m_watchdog = getEngine().getCurrentWatchDog();            // Sleep initially...            try            {                Thread.sleep( m_initialDelay * 1000L );            }            catch( InterruptedException e )            {                throw new InternalWikiException("Interrupted while waiting to start.");            }            m_watchdog.enterState("Full reindex");            // Reindex everything            m_provider.doFullLuceneReindex();            m_watchdog.exitState();        }        public void backgroundTask() throws Exception        {            m_watchdog.enterState("Emptying index queue", 60);            synchronized ( m_provider.m_updates )            {                while( m_provider.m_updates.size() > 0 )                {                    Object[] pair = m_provider.m_updates.remove(0);                    WikiPage page = ( WikiPage ) pair[0];                    String text = ( String ) pair[1];                    m_provider.updateLuceneIndex(page, text);                }            }            m_watchdog.exitState();        }    }    // FIXME: This class is dumb; needs to have a better implementation    private static class SearchResultImpl        implements SearchResult    {        private WikiPage m_page;        private int      m_score;        private String[] m_contexts;        public SearchResultImpl( WikiPage page, int score, String[] contexts )        {            m_page  = page;            m_score = score;            m_contexts = contexts;        }        public WikiPage getPage()        {            return m_page;        }        /* (non-Javadoc)         * @see com.ecyrd.jspwiki.SearchResult#getScore()         */        public int getScore()        {            return m_score;        }        public String[] getContexts()        {            return m_contexts;        }    }}
上一页 12
💿 文件大小 15040 K
👤 上传用户 add505
📂 所属分类 Java编程
🏷️ 相关标签

#jspwiki #source #code
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -