cmssearchindex.java
来自「找了很久才找到到源代码」· Java 代码 · 共 1,124 行 · 第 1/3 页
JAVA
1,124 行
/*
* File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/CmsSearchIndex.java,v $
* Date : $Date: 2007-08-27 11:28:14 $
* Version: $Revision: 1.65 $
*
* This library is part of OpenCms -
* the Open Source Content Management System
*
* Copyright (c) 2002 - 2007 Alkacon Software GmbH (http://www.alkacon.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* For further information about Alkacon Software GmbH, please see the
* company website: http://www.alkacon.com
*
* For further information about OpenCms, please see the
* project website: http://www.opencms.org
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.opencms.search;
import org.opencms.configuration.I_CmsConfigurationParameterHandler;
import org.opencms.file.CmsObject;
import org.opencms.file.CmsProject;
import org.opencms.file.CmsRequestContext;
import org.opencms.file.CmsResource;
import org.opencms.i18n.CmsLocaleManager;
import org.opencms.main.CmsException;
import org.opencms.main.CmsIllegalArgumentException;
import org.opencms.main.CmsLog;
import org.opencms.main.OpenCms;
import org.opencms.search.documents.A_CmsVfsDocument;
import org.opencms.search.documents.I_CmsDocumentFactory;
import org.opencms.search.documents.I_CmsTermHighlighter;
import org.opencms.search.fields.CmsSearchField;
import org.opencms.search.fields.CmsSearchFieldConfiguration;
import org.opencms.util.CmsStringUtil;
import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
/**
* Implements the search within an index and the management of the index configuration.<p>
*
* @author Carsten Weinholz
* @author Thomas Weckert
* @author Alexander Kandzior
*
* @version $Revision: 1.65 $
*
* @since 6.0.0
*/
public class CmsSearchIndex implements I_CmsConfigurationParameterHandler {
/** Constant for additional param to enable excerpt creation (default: true). */
public static final String EXCERPT = CmsSearchIndex.class.getName() + ".createExcerpt";
/** Constant for additional param to enable permission checks (default: true). */
public static final String PERMISSIONS = CmsSearchIndex.class.getName() + ".checkPermissions";
/** Constant for additional param to set the thread priority during search. */
public static final String PRIORITY = CmsSearchIndex.class.getName() + ".priority";
/** Automatic ("auto") index rebuild mode. */
public static final String REBUILD_MODE_AUTO = "auto";
/** Manual ("manual") index rebuild mode. */
public static final String REBUILD_MODE_MANUAL = "manual";
/**
* Special root path append token for optimized path queries.<p>
*
* @deprecated This is not longer requires since OpenCms version 7.0.2, since the implementation
* of {@link CmsSearchManager#getAnalyzer(Locale)} was modified to use always
* use for the {@link CmsSearchField#FIELD_ROOT} filed.
*
* @see #rootPathRewrite(String)
*/
public static final String ROOT_PATH_SUFFIX = "";
/** Special root path start token for optimized path queries. */
public static final String ROOT_PATH_TOKEN = "root";
/** Constant for a field list that contains the "meta" field as well as the "content" field. */
static final String[] DOC_META_FIELDS = new String[] {CmsSearchField.FIELD_META, CmsSearchField.FIELD_CONTENT};
/** The log object for this class. */
private static final Log LOG = CmsLog.getLog(CmsSearchIndex.class);
/** The list of configured index sources. */
List m_sources;
/** The excerpt mode for this index. */
private boolean m_createExcerpt;
/** Documenttypes of folders/channels. */
private Map m_documenttypes;
/** The permission check mode for this index. */
private boolean m_dontCheckPermissions;
/** An internal enabled flag, used to disable the index if for instance the configured project does not exist. */
private boolean m_enabled;
/** The search field configuration of this index. */
private CmsSearchFieldConfiguration m_fieldConfiguration;
/** The name of the search field configuration used by this index. */
private String m_fieldConfigurationName;
/** The locale of this index. */
private Locale m_locale;
/** The name of this index. */
private String m_name;
/** The path where this index stores it's data in the "real" file system. */
private String m_path;
/** The thread priority for a search. */
private int m_priority;
/** The project of this index. */
private String m_project;
/** The rebuild mode for this index. */
private String m_rebuild;
/** The configured sources for this index. */
private List m_sourceNames;
/**
* Default constructor only intended to be used by the xml configuration. <p>
*
* It is recommended to use the constructor <code>{@link #CmsSearchIndex(String)}</code>
* as it enforces the mandatory name argument. <p>
*
*/
public CmsSearchIndex() {
m_sourceNames = new ArrayList();
m_documenttypes = new HashMap();
m_createExcerpt = true;
m_enabled = true;
m_priority = -1;
}
/**
* Creates a new CmsSearchIndex with the given name.<p>
*
* @param name the system-wide unique name for the search index
*
* @throws org.opencms.main.CmsIllegalArgumentException
* if the given name is null, empty or already taken
* by another search index.
*
*/
public CmsSearchIndex(String name)
throws CmsIllegalArgumentException {
this();
setName(name);
}
/**
* Rewrites the a resource path for use in the {@link CmsSearchField#FIELD_ROOT} field.<p>
*
* This is required in order to use a Lucene "phrase query" on the resource path.
* Using a phrase query is much, much better for the search performance then using a straightforward
* "prefix query". With a "prefix query", Lucene would interally generate a huge list of boolean sub-queries,
* exactly one for every document in the VFS subtree of the query. So if you query on "/sites/default/*" on
* a large OpenCms installation, this means thousands of sub-queries.
* Using the "phrase query", only one (or very few) queries are internally generated, and the result
* is just the same.<p>
*
* Since OpenCms version 7.0.2, the {@link CmsSearchField#FIELD_ROOT} field always uses a whitespace analyzer.
* This is ensured by the {@link CmsSearchManager#getAnalyzer(Locale)} implementation.
* The Lucene whitespace analyzer uses all words as tokens, no lower case transformation or word stemming is done.
* So the root path is now just split along the '/' chars, which are replaced by simple space chars.<p>
*
* <i>Historical implementation sidenote:</i>
* Before 7.0.2, the {@link CmsSearchField#FIELD_ROOT} used the analyzer configured by the language.
* This introduced a number of issues as the language analyzer might modify the directory names, leading to potential
* duplicates (e.g. <code>members/</code> and <code>member/</code> may both be trimmed to <code>member</code>),
* so that the prefix search returns more or different results then expected.
* This was avoided by a workaround where this method basically replaced the "/" of a path with "@o.c ".
* Using this trick most Lucene analyzers left the directory names untouched,
* and treated them like literal email addresses. However, this trick did not work with all analyzers,
* for example the Russian analyzer does not work as expected.
* An additional workaround was required to avoid problems with folders that that are different
* only by the upper / lower chars. Since 7.0.2, these workarounds are not longer required, since the
* {@link CmsSearchField#FIELD_ROOT} field always uses a whitespace analyzer, which is a much better solution.<p>
*
* @param path the path to rewrite
*
* @return the re-written path
*/
public static String rootPathRewrite(String path) {
StringBuffer result = new StringBuffer(256);
String[] elements = rootPathSplit(path);
for (int i = 0; i < elements.length; i++) {
result.append(elements[i]);
if ((i + 1) < elements.length) {
result.append(' ');
}
}
return result.toString();
}
/**
* Spits the a resource path into tokens for use in the <code>{@link CmsSearchField#FIELD_ROOT}</code> field
* and with the <code>{@link #rootPathRewrite(String)}</code> method.<p>
*
* @param path the path to split
*
* @return the split path
*
* @see #rootPathRewrite(String)
*/
public static String[] rootPathSplit(String path) {
if (CmsStringUtil.isEmpty(path)) {
return new String[] {ROOT_PATH_TOKEN};
}
// split the path
String[] elements = CmsStringUtil.splitAsArray(path, '/');
String[] result = new String[elements.length + 1];
result[0] = ROOT_PATH_TOKEN;
System.arraycopy(elements, 0, result, 1, elements.length);
return result;
}
/**
* Adds a parameter.<p>
*
* @param key the key/name of the parameter
* @param value the value of the parameter
*/
public void addConfigurationParameter(String key, String value) {
if (PERMISSIONS.equals(key)) {
m_dontCheckPermissions = !Boolean.valueOf(value).booleanValue();
} else if (EXCERPT.equals(key)) {
m_createExcerpt = Boolean.valueOf(value).booleanValue();
} else if (PRIORITY.equals(key)) {
m_priority = Integer.parseInt(value);
if (m_priority < Thread.MIN_PRIORITY) {
m_priority = Thread.MIN_PRIORITY;
LOG.error(Messages.get().getBundle().key(
Messages.LOG_SEARCH_PRIORITY_TOO_LOW_2,
value,
new Integer(Thread.MIN_PRIORITY)));
} else if (m_priority > Thread.MAX_PRIORITY) {
m_priority = Thread.MAX_PRIORITY;
LOG.debug(Messages.get().getBundle().key(
Messages.LOG_SEARCH_PRIORITY_TOO_HIGH_2,
value,
new Integer(Thread.MAX_PRIORITY)));
}
}
}
/**
* Adds am index source to this search index.<p>
*
* @param sourceName the index source name to add
*/
public void addSourceName(String sourceName) {
m_sourceNames.add(sourceName);
}
/**
* Checks is this index has been configured correctly.<p>
*
* In case the check fails, the <code>enabled</code> property
* is set to <code>false</code>
*
* @param cms a OpenCms user context to perform the checks with (should have "Administrator" permissions)
*
* @return <code>true</code> in case the index is correctly configured and enabled after the check
*
* @see #isEnabled()
*/
public boolean checkConfiguration(CmsObject cms) {
if (isEnabled()) {
// check if the project for the index exists
try {
cms.readProject(getProject());
setEnabled(true);
} catch (CmsException e) {
// the project does not exist, disable the index
setEnabled(false);
if (LOG.isErrorEnabled()) {
LOG.error(Messages.get().getBundle().key(
Messages.LOG_SEARCHINDEX_CREATE_BAD_PROJECT_2,
getProject(),
getName()));
}
}
} else {
if (LOG.isInfoEnabled()) {
LOG.info(Messages.get().getBundle().key(Messages.LOG_SEARCHINDEX_DISABLED_1, getName()));
}
}
return isEnabled();
}
/**
* @see java.lang.Object#equals(java.lang.Object)
*/
public boolean equals(Object obj) {
if (obj == this) {
return true;
}
if (obj instanceof CmsSearchIndex) {
return ((CmsSearchIndex)obj).m_name.equals(m_name);
}
return false;
}
/**
* @see org.opencms.configuration.I_CmsConfigurationParameterHandler#getConfiguration()
*/
public Map getConfiguration() {
Map result = new TreeMap();
if (m_priority > 0) {
result.put(PRIORITY, new Integer(m_priority));
}
if (!m_createExcerpt) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?