📄 searchindex.java
字号:
/* * Copyright (c) 2003-2006, Simon Brown * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * - Neither the name of Pebble nor the names of its contributors may * be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */package net.sourceforge.pebble.index;import net.sourceforge.pebble.domain.*;import net.sourceforge.pebble.search.SearchException;import net.sourceforge.pebble.search.SearchHit;import net.sourceforge.pebble.search.SearchResults;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.DateField;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.Term;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.Hits;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.Searcher;import java.io.File;import java.io.IOException;import java.util.Iterator;import java.util.List;/** * Wraps up the functionality to index blog entries. This is really just * a convenient wrapper around Lucene. * * @author Simon Brown */public class SearchIndex { /** the log used by this class */ private static final Log log = LogFactory.getLog(SearchIndex.class); private final Blog blog; public SearchIndex(Blog blog) { this.blog = blog; } /** * Clears the index. */ public void clear() { File searchDirectory = new File(blog.getSearchIndexDirectory()); if (!searchDirectory.exists()) { searchDirectory.mkdirs(); } synchronized (blog) { try { Analyzer analyzer = getAnalyzer(); IndexWriter writer = new IndexWriter(searchDirectory, analyzer, true); writer.close(); } catch (Exception e) { log.error(e.getMessage(), e); } } } /** * Allows a collection of blog entries to be indexed. */ public void indexBlogEntries(List<BlogEntry> blogEntries) { synchronized (blog) { try { Analyzer analyzer = getAnalyzer(); IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false); for (BlogEntry blogEntry : blogEntries) { index(blogEntry, writer); } writer.close(); } catch (Exception e) { log.error(e.getMessage(), e); } } } /** * Allows a collection of static pages to be indexed. */ public void indexStaticPages(List<StaticPage> staticPages) { synchronized (blog) { try { Analyzer analyzer = getAnalyzer(); IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false); for (StaticPage staticPage : staticPages) { index(staticPage, writer); } writer.close(); } catch (Exception e) { log.error(e.getMessage(), e); } } } /** * Allows a single blog entry to be (re)indexed. If the entry is already * indexed, this method deletes the previous index before adding the new * one. * * @param blogEntry the BlogEntry instance to index */ public void index(BlogEntry blogEntry) { try { synchronized (blog) { // first delete the blog entry from the index (if it was there) unindex(blogEntry); Analyzer analyzer = getAnalyzer(); IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false); index(blogEntry, writer); writer.close(); } } catch (Exception e) { log.error(e.getMessage(), e); } } /** * Allows a single static page to be (re)indexed. If the page is already * indexed, this method deletes the previous index before adding the new * one. * * @param staticPage the StaticPage instance to index */ public void index(StaticPage staticPage) { try { synchronized (blog) { // first delete the static page from the index (if it was there) unindex(staticPage); Analyzer analyzer = getAnalyzer(); IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false); index(staticPage, writer); writer.close(); } } catch (Exception e) { log.error(e.getMessage(), e); } } /** * Gets the Analyzer implementation to use. * * @return an Analyzer instance * @throws Exception */ private Analyzer getAnalyzer() throws Exception { Class c = Class.forName(blog.getLuceneAnalyzer()); return (Analyzer)c.newInstance(); } /** * Removes the index for a single blog entry to be removed. * * @param blogEntry the BlogEntry instance to be removed */ public void unindex(BlogEntry blogEntry) { try { synchronized (blog) { log.debug("Attempting to delete index for " + blogEntry.getTitle()); IndexReader reader = IndexReader.open(blog.getSearchIndexDirectory()); Term term = new Term("id", blogEntry.getId()); log.debug("Deleted " + reader.delete(term) + " document(s) from the index"); reader.close(); } } catch (Exception e) { log.error(e.getMessage(), e); } } /** * Removes the index for a single blog entry to be removed. * * @param staticPage the StaticPage instance to be removed */ public void unindex(StaticPage staticPage) { try { synchronized (blog) { log.debug("Attempting to delete index for " + staticPage.getTitle()); IndexReader reader = IndexReader.open(blog.getSearchIndexDirectory()); Term term = new Term("id", staticPage.getId()); log.debug("Deleted " + reader.delete(term) + " document(s) from the index"); reader.close(); } } catch (Exception e) { log.error(e.getMessage(), e); } } /** * Helper method to index an individual blog entry. * * @param blogEntry the BlogEntry instance to index * @param writer the IndexWriter to index with */ private void index(BlogEntry blogEntry, IndexWriter writer) { if (!blogEntry.isPublished()) { return; } try { log.debug("Indexing " + blogEntry.getTitle()); Document document = new Document(); document.add(Field.Keyword("id", blogEntry.getId())); if (blogEntry.getTitle() != null) { document.add(Field.Text("title", blogEntry.getTitle())); } else { document.add(Field.Text("title", "")); } document.add(Field.Keyword("permalink", blogEntry.getPermalink())); document.add(Field.UnIndexed("date", DateField.dateToString(blogEntry.getDate()))); if (blogEntry.getBody() != null) { document.add(Field.UnStored("body", blogEntry.getBody())); } else { document.add(Field.UnStored("body", "")); } if (blogEntry.getTruncatedContent() != null) { document.add(Field.Text("truncatedBody", blogEntry.getTruncatedContent())); } else { document.add(Field.Text("truncatedBody", "")); } if (blogEntry.getAuthor() != null) { document.add(Field.Text("author", blogEntry.getAuthor())); } // build up one large string with all searchable content // i.e. entry title, entry body and all response bodies StringBuffer searchableContent = new StringBuffer(); searchableContent.append(blogEntry.getTitle()); searchableContent.append(" "); searchableContent.append(blogEntry.getBody()); for (Category category : blogEntry.getCategories()) { document.add(Field.Text("category", category.getId())); } for (Tag tag : blogEntry.getAllTags()) { document.add(Field.Text("tag", tag.getName())); } searchableContent.append(" "); Iterator it = blogEntry.getComments().iterator(); while (it.hasNext()) { Comment comment = (Comment)it.next(); if (comment.isApproved()) { searchableContent.append(comment.getBody()); searchableContent.append(" "); } } it = blogEntry.getTrackBacks().iterator(); while (it.hasNext()) { TrackBack trackBack = (TrackBack)it.next(); if (trackBack.isApproved()) { searchableContent.append(trackBack.getExcerpt()); searchableContent.append(" "); } } // join the title and body together to make searching on them both easier document.add(Field.UnStored("blogEntry", searchableContent.toString())); writer.addDocument(document); } catch (Exception e) { log.error(e.getMessage(), e); } } /** * Helper method to index an individual blog entry. * * @param staticPage the Page instance instance to index * @param writer the IndexWriter to index with */ private void index(StaticPage staticPage, IndexWriter writer) { try { log.debug("Indexing " + staticPage.getTitle()); Document document = new Document(); document.add(Field.Keyword("id", staticPage.getId())); if (staticPage.getTitle() != null) { document.add(Field.Text("title", staticPage.getTitle())); } else { document.add(Field.Text("title", "")); } document.add(Field.Keyword("permalink", staticPage.getPermalink())); document.add(Field.UnIndexed("date", DateField.dateToString(staticPage.getDate()))); if (staticPage.getBody() != null) { document.add(Field.UnStored("body", staticPage.getBody())); } else { document.add(Field.UnStored("body", "")); } if (staticPage.getTruncatedContent() != null) { document.add(Field.Text("truncatedBody", staticPage.getTruncatedContent())); } else { document.add(Field.Text("truncatedBody", "")); } if (staticPage.getAuthor() != null) { document.add(Field.Text("author", staticPage.getAuthor())); } // build up one large string with all searchable content // i.e. entry title, entry body and all response bodies StringBuffer searchableContent = new StringBuffer(); searchableContent.append(staticPage.getTitle()); searchableContent.append(" "); searchableContent.append(staticPage.getBody()); // join the title and body together to make searching on them both easier document.add(Field.UnStored("blogEntry", searchableContent.toString())); writer.addDocument(document); } catch (Exception e) { log.error(e.getMessage(), e); } } public SearchResults search(String queryString) throws SearchException { log.debug("Performing search : " + queryString); SearchResults searchResults = new SearchResults(); searchResults.setQuery(queryString); if (queryString != null && queryString.length() > 0) { Searcher searcher = null; try { searcher = new IndexSearcher(blog.getSearchIndexDirectory()); Query query = QueryParser.parse(queryString, "blogEntry", getAnalyzer()); Hits hits = searcher.search(query); for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); SearchHit result = new SearchHit( blog, doc.get("id"), doc.get("permalink"), doc.get("title"), doc.get("truncatedBody"), DateField.stringToDate(doc.get("date")), hits.score(i)); searchResults.add(result); } } catch (ParseException pe) { pe.printStackTrace(); searchResults.setMessage("Sorry, but there was an error. Please try another search"); } catch (Exception e) { e.printStackTrace(); throw new SearchException(e.getMessage()); } finally { if (searcher != null) { try { searcher.close(); } catch (IOException e) { // can't do much now! ;-) } } } } return searchResults; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -