📄 cmssearchindex.java
字号:
/*
* File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/CmsSearchIndex.java,v $
* Date : $Date: 2006/03/27 14:52:54 $
* Version: $Revision: 1.60 $
*
* This library is part of OpenCms -
* the Open Source Content Mananagement System
*
* Copyright (c) 2005 Alkacon Software GmbH (http://www.alkacon.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* For further information about Alkacon Software GmbH, please see the
* company website: http://www.alkacon.com
*
* For further information about OpenCms, please see the
* project website: http://www.opencms.org
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.opencms.search;
import org.opencms.configuration.I_CmsConfigurationParameterHandler;
import org.opencms.file.CmsObject;
import org.opencms.file.CmsProject;
import org.opencms.file.CmsRequestContext;
import org.opencms.main.CmsException;
import org.opencms.main.CmsIllegalArgumentException;
import org.opencms.main.CmsLog;
import org.opencms.main.OpenCms;
import org.opencms.search.documents.CmsHighlightFinder;
import org.opencms.search.documents.I_CmsDocumentFactory;
import org.opencms.util.CmsStringUtil;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
/**
* Implements the search within an index and the management of the index configuration.<p>
*
* @author Carsten Weinholz
* @author Thomas Weckert
* @author Alexander Kandzior
*
* @version $Revision: 1.60 $
*
* @since 6.0.0
*/
public class CmsSearchIndex implements I_CmsConfigurationParameterHandler {
/** Constant for a field list that contains the "meta" field as well as the "content" field. */
public static final String[] DOC_META_FIELDS = new String[] {
I_CmsDocumentFactory.DOC_META,
I_CmsDocumentFactory.DOC_CONTENT};
/** Constant for additional param to enable excerpt creation (default: true). */
public static final String EXCERPT = CmsSearchIndex.class.getName() + ".createExcerpt";
/** Constant for additional param to enable permission checks (default: true). */
public static final String PERMISSIONS = CmsSearchIndex.class.getName() + ".checkPermissions";
/** Constant for additional param to set the thread priority during search. */
public static final String PRIORITY = CmsSearchIndex.class.getName() + ".priority";
/** Automatic ("auto") index rebuild mode. */
public static final String REBUILD_MODE_AUTO = "auto";
/** Manual ("manual") index rebuild mode. */
public static final String REBUILD_MODE_MANUAL = "manual";
/** Special root path append token for optimized path queries. */
public static final String ROOT_PATH_SUFFIX = "@o.c";
/** Special root path start token for optimized path queries. */
public static final String ROOT_PATH_TOKEN = "root" + ROOT_PATH_SUFFIX;
/** Separator for the search excerpt fragments. */
private static final String EXCERPT_FRAGMENT_SEPARATOR = " ... ";
/** Size of the excerpt fragments in byte. */
private static final int EXCERPT_FRAGMENT_SIZE = 60;
/** Fragments required in excerpt. */
private static final int EXCERPT_REQUIRED_FRAGMENTS = 5;
/** The log object for this class. */
private static final Log LOG = CmsLog.getLog(CmsSearchIndex.class);
/** The list of configured index sources. */
List m_sources;
/** The excerpt mode for this index. */
private boolean m_createExcerpt;
/** Documenttypes of folders/channels. */
private Map m_documenttypes;
/** The permission check mode for this index. */
private boolean m_dontCheckPermissions;
/** An internal enabled flag, used to disable the index if for instance the configured project does not exist. */
private boolean m_enabled;
/** The language filter of this index. */
private String m_locale;
/** The name of this index. */
private String m_name;
/** The path where this index stores it's data in the "real" file system. */
private String m_path;
/** The thread priority for a search. */
private int m_priority;
/** The project of this index. */
private String m_project;
/** The rebuild mode for this index. */
private String m_rebuild;
/** The configured sources for this index. */
private List m_sourceNames;
/**
* Default constructor only intended to be used by the xml configuration. <p>
*
* It is recommended to use the constructor <code>{@link #CmsSearchIndex(String)}</code>
* as it enforces the mandatory name argument. <p>
*
*/
public CmsSearchIndex() {
m_sourceNames = new ArrayList();
m_documenttypes = new HashMap();
m_createExcerpt = true;
m_enabled = true;
m_priority = -1;
}
/**
* Creates a new CmsSearchIndex with the given name.<p>
*
* @param name the system-wide unique name for the search index
*
* @throws org.opencms.main.CmsIllegalArgumentException
* if the given name is null, empty or already taken
* by another search index.
*
*/
public CmsSearchIndex(String name)
throws CmsIllegalArgumentException {
this();
setName(name);
}
/**
* Rewrites the a resource path for use in the {@link I_CmsDocumentFactory#DOC_ROOT} field.<p>
*
* All "/" chars in the path are replaced with the {@link #ROOT_PATH_SUFFIX} token.
* This is required in order to use a Lucene "phrase query" on the resource path.
* Using a phrase query is much, much better for the search performance then using a straightforward
* "prefix query". With a "prefix query", Lucene would interally generate a huge list of boolean sub-queries,
* exactly one for every document in the VFS subtree of the query. So if you query on "/sites/default/*" on
* a large OpenCms installation, this means thousands of sub-queries.
* Using the "phrase query", only one (or very few) queries are internally generated, and the result
* is just the same.<p>
*
* This implementation basically replaces the "/" of a path with "@o.c ".
* This is a trick so that the Lucene analyzer leaves the
* directory names untouched, since it treats them like literal email addresses.
* Otherwise the language analyzer might modify the directory names, leading to potential
* duplicates (e.g. <code>members/</code> and <code>member/</code> may both be trimmed to <code>member</code>),
* so that the prefix search returns more results then expected.<p>
* @param path the path to rewrite
*
* @return the re-written path
*/
public static String rootPathRewrite(String path) {
StringBuffer result = new StringBuffer(256);
String[] elements = rootPathSplit(path);
for (int i = 0; i < elements.length; i++) {
result.append(elements[i]);
if ((i + 1) < elements.length) {
result.append(' ');
}
}
return result.toString();
}
/**
* Spits the a resource path into tokens for use in the <code>{@link I_CmsDocumentFactory#DOC_ROOT}</code> field
* and with the <code>{@link #rootPathRewrite(String)}</code> method.<p>
*
* @param path the path to split
*
* @return the splitted path
*
* @see #rootPathRewrite(String)
*/
public static String[] rootPathSplit(String path) {
if (CmsStringUtil.isEmpty(path)) {
return new String[] {ROOT_PATH_TOKEN};
}
// split the path
String[] elements = CmsStringUtil.splitAsArray(path, '/');
int length = elements.length + 1;
String[] result = new String[length];
result[0] = ROOT_PATH_TOKEN;
for (int i = 1; i < length; i++) {
// append suffix to all path elements
result[i] = elements[i - 1] + ROOT_PATH_SUFFIX;
// underscore '_' is a word separator for the Lucene analyzer, must replace this
result[i] = result[i].replace('_', '0');
}
return result;
}
/**
* Adds a parameter.<p>
*
* @param key the key/name of the parameter
* @param value the value of the parameter
*/
public void addConfigurationParameter(String key, String value) {
if (PERMISSIONS.equals(key)) {
m_dontCheckPermissions = !Boolean.valueOf(value).booleanValue();
} else if (EXCERPT.equals(key)) {
m_createExcerpt = Boolean.valueOf(value).booleanValue();
} else if (PRIORITY.equals(key)) {
m_priority = Integer.parseInt(value);
if (m_priority < Thread.MIN_PRIORITY) {
m_priority = Thread.MIN_PRIORITY;
LOG.error(Messages.get().getBundle().key(
Messages.LOG_SEARCH_PRIORITY_TOO_LOW_2,
value,
new Integer(Thread.MIN_PRIORITY)));
} else if (m_priority > Thread.MAX_PRIORITY) {
m_priority = Thread.MAX_PRIORITY;
LOG.debug(Messages.get().getBundle().key(
Messages.LOG_SEARCH_PRIORITY_TOO_HIGH_2,
value,
new Integer(Thread.MAX_PRIORITY)));
}
}
}
/**
* Adds am index source to this search index.<p>
*
* @param sourceName the index source name to add
*/
public void addSourceName(String sourceName) {
m_sourceNames.add(sourceName);
}
/**
* Checks is this index has been configured correctly.<p>
*
* In case the check fails, the <code>enabled</code> property
* is set to <code>false</code>
*
* @param cms a OpenCms user context to perform the checks with (should have "Administrator" permissions)
*
* @return <code>true</code> in case the index is correctly configured and enabled after the check
*
* @see #isEnabled()
*/
public boolean checkConfiguration(CmsObject cms) {
if (isEnabled()) {
// check if the project for the index exists
try {
cms.readProject(getProject());
setEnabled(true);
} catch (CmsException e) {
// the project does not exist, disable the index
setEnabled(false);
if (LOG.isErrorEnabled()) {
LOG.error(Messages.get().getBundle().key(
Messages.LOG_SEARCHINDEX_CREATE_BAD_PROJECT_2,
getProject(),
getName()));
}
}
} else {
if (LOG.isInfoEnabled()) {
LOG.info(Messages.get().getBundle().key(Messages.LOG_SEARCHINDEX_DISABLED_1, getName()));
}
}
return isEnabled();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -