📄 chatsearchmanager.java
字号:
/**
* $RCSfile$
* $Revision: 29543 $
* $Date: 2006-04-19 15:38:04 -0700 (Wed, 19 Apr 2006) $
*
* Copyright (C) 2004-2008 Jive Software. All rights reserved.
*
* This software is published under the terms of the GNU Public License (GPL),
* a copy of which is included in this distribution, or a commercial license
* agreement with Jive.
*/
package org.jivesoftware.xmpp.workgroup.search;
import org.jivesoftware.xmpp.workgroup.AgentSession;
import org.jivesoftware.xmpp.workgroup.Workgroup;
import org.jivesoftware.xmpp.workgroup.event.WorkgroupEventDispatcher;
import org.jivesoftware.xmpp.workgroup.event.WorkgroupEventListener;
import org.jivesoftware.openfire.fastpath.providers.ChatNotes;
import org.jivesoftware.openfire.fastpath.util.TaskEngine;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.jivesoftware.database.DbConnectionManager;
import org.jivesoftware.util.JiveGlobals;
import org.jivesoftware.util.StringUtils;
import org.jivesoftware.util.ClassUtils;
import org.xmpp.component.ComponentManagerFactory;
import org.xmpp.component.Log;
import org.xmpp.packet.JID;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
/**
* Manages the transcript search feature by defining properties of the search indexer. Each
* workgroup will use an instance of this class. Each instance can be configured according to
* the needs of each workgroup or may just use the global configuration. Read the properties
* section below to learn the variables that can be configured globaly and per workgroup.<p>
* <p/>
* Indexing can either be done real-time by calling updateIndex(boolean) or rebuildIndex(). Out of
* the box Live Assistant runs the indexer in timed update mode with a queue that holds the
* generated transcripts since the last update. Once the queue has been filled full an update will
* be forced even before the time interval has not been completed. It is possible to configure the
* size of the queue or even disable it and only update the index based on a timed update.<p>
* <p/>
* The automated updating mode can be adjusted by setting how often batch indexing is done. You
* can adjust this interval to suit your needs. Frequent updates mean that transcripts will be
* searchable more quickly. Less frequent updates use fewer system resources.<p>
* <p/>
* The following global properties are used by this class. Global properties will apply to all the
* workgroups unless the workgroup has overriden the property.
* <ul>
* <li><tt>workgroup.search.frequency.execution</tt> -- number of minutes to wait until the next
* update process is performed. Default is <tt>5</tt> minutes.</li>
* <li><tt>workgroup.search.pending.transcripts</tt> -- maximum number of transcripts that can be
* generated since the last update process was executed before forcing the update process to
* be executed. A value of -1 disables this feature. Default is <tt>5</tt> transcripts.</li>
* <li><tt>workgroup.search.frequency.optimization</tt> -- number of hours to wait until the next
* optimization. Default is <tt>24</tt> hours.</li>
* <li><tt>workgroup.search.analyzer.className</tt> -- name of the Lucene analyzer class to be
* used for indexing. If none was defined then {@link StandardAnalyzer} will be used.</li>
* <li><tt>workgroup.search.analyzer.stopWordList</tt> -- String[] of words to use in the global
* analyzer. If none was defined then the default stop words defined in Lucene will be used.
* </li>
* <li><tt>workgroup.search.maxdays</tt> -- maximum number of days a transcript could be old in
* order to be included when rebuilding the index. Default is <tt>365</tt> days.</li>
* </ul>
* <p/>
* The following workgroup properties are used by this class. Each workgroup has the option to
* override the corresponding defined global property.
* <ul>
* <li><tt>search.analyzer.className</tt> -- name of the Lucene analyzer class to be
* used for indexing. If none was defined then the value defined in
* <tt>workgroup.search.analyzer.className</tt> will be used instead.</li>
* <li><tt>search.analyzer.stopWordList</tt> -- String[] of words to use in the analyzer defined
* for the workgroup. If none was defined then the default stop words defined in Lucene will
* be used.</li>
* <li><tt>search.maxdays</tt> -- maximum number of days a transcript could be old in
* order to be included when rebuilding the index. If none was defined then the value defined
* in <tt>workgroup.search.maxdays</tt> will be used.</li>
* </ul>
*
* @author Gaston Dombiak
*/
public class ChatSearchManager implements WorkgroupEventListener {
private static final String CHATS_SINCE_DATE =
"SELECT sessionID,transcript,startTime FROM fpSession WHERE workgroupID=? AND " +
"startTime>? AND transcript IS NOT NULL ORDER BY startTime";
private static final String AGENTS_IN_SESSION =
"SELECT agentJID FROM fpAgentSession WHERE sessionID=?";
private static final String LOAD_DATES =
"SELECT lastUpdated,lastOptimization FROM fpSearchIndex WHERE workgroupID=?";
private static final String INSERT_DATES =
"INSERT INTO fpSearchIndex(workgroupID, lastUpdated, lastOptimization) VALUES(?,?,?)";
private static final String UPDATE_DATES =
"UPDATE fpSearchIndex SET lastUpdated=?,lastOptimization=? WHERE workgroupID=?";
private static final String DELETE_DATES =
"DELETE FROM fpSearchIndex WHERE workgroupID=?";
private static Map<String, ChatSearchManager> instances = new ConcurrentHashMap<String, ChatSearchManager>();
/**
* Holds the path to the parent folder of the folders that will store the workgroup
* index files.
*/
private static String parentFolder = JiveGlobals.getHomeDirectory() + File.separator + "index";
private static final long ONE_HOUR = 60 * 60 * 1000;
/**
* Hold the workgroup whose chats are being indexed by this instance. Each workgroup will
* have a ChatSearchManager since each ChatSearchManager may use a different Analyzer according
* to the workgroup needs.
*/
private Workgroup workgroup;
private Analyzer indexerAnalyzer;
private String searchDirectory;
private Searcher searcher = null;
private IndexReader searcherReader = null;
ReadWriteLock searcherLock = new ReentrantReadWriteLock();
/**
* Holds the date of the last chat that was added to the index. This information is used for
* getting the new chats since this date that should be added to the index.
*/
private Date lastUpdated;
/**
* Keeps the last time when the index was optimized. The index is optimized once a day.
*/
private Date lastOptimization;
/**
* Keeps the last date when the updating process was executed. Every time
* {@link #updateIndex(boolean)} or {@link #rebuildIndex()} are invoked this variable will
* be updated.
*/
private Date lastExecution;
/**
* Keeps the number of transcripts that have been generated since the last update process
* was executed.
*/
private AtomicInteger pendingTranscripts = new AtomicInteger(0);
/**
* Caches the filters for performance. The cached filters will be cleared when the index is
* modified.
*/
private ConcurrentHashMap<String, Filter> cachedFilters = new ConcurrentHashMap<String, Filter>();
static {
// Check if we need to create the parent folder
File dir = new File(parentFolder);
if (!dir.exists() || !dir.isDirectory()) {
dir.mkdir();
}
}
/**
* Returns the ChatSearchManager that should be used for a given {@link Workgroup}. The index
* Analyzer that the returned ChatSearchManager will use could be determined by the workgroup
* property <tt>search.analyzer.className</tt>. If the workgroup property has not been defined
* then the global Analyzer will be used.<p>
* <p/>
* The class of the global Analyzer can be specified setting the
* <tt>workgroup.search.analyzer.className</tt> property. If this property does not exist
* then a {@link StandardAnalyzer} will be used as the global Analyzer..
*
* @param workgroup the workgroup to index.
* @return the ChatSearchManager that should be used for a given workgroup.
*/
public static ChatSearchManager getInstanceFor(Workgroup workgroup) {
String workgroupName = workgroup.getJID().getNode();
ChatSearchManager answer = instances.get(workgroupName);
if (answer == null) {
synchronized (workgroupName.intern()) {
answer = instances.get(workgroupName);
if (answer == null) {
answer = new ChatSearchManager(workgroup);
instances.put(workgroupName, answer);
}
}
}
return answer;
}
/**
* Returns the Lucene analyzer class that is be used for indexing. The analyzer class
* name is stored as the Jive Property <tt>workgroup.search.analyzer.className</tt>.
*
* @return the name of the analyzer class that is used for indexing.
*/
public static String getAnalyzerClass() {
String analyzerClass = JiveGlobals.getProperty("workgroup.search.analyzer.className");
if (analyzerClass == null) {
return StandardAnalyzer.class.getName();
}
else {
return analyzerClass;
}
}
/**
* Sets the Lucene analyzer class that is used for indexing. Anytime the analyzer class
* is changed, the search index must be rebuilt for searching to work reliably. The analyzer
* class name is stored as the Jive Property <tt>workgroup.search.analyzer.className</tt>.
*
* @param className the name of the analyzer class will be used for indexing.
*/
public static void setAnalyzerClass(String className) {
if (className == null) {
throw new NullPointerException("Argument is null.");
}
// If the setting hasn't changed, do nothing.
if (className.equals(getAnalyzerClass())) {
return;
}
JiveGlobals.setProperty("workgroup.search.analyzer.className", className);
}
/**
* Notification message saying that the workgroup service is being shutdown. Release all
* the instances so the GC can claim all the workgroup objects.
*/
public static void shutdown() {
for (ChatSearchManager manager : instances.values()) {
manager.stop();
}
instances.clear();
}
private void stop() {
WorkgroupEventDispatcher.removeListener(this);
}
/**
* Returns the number of minutes to wait until the next update process is performed. The update
* process may be executed before the specified frequency if a given number of transcripts
* have been generated since the last execution. The maximum number of transcripts that can
* be generated before triggering the update process is specified by
* {@link #getMaxPendingTranscripts()}.
*/
private static int getExecutionFrequency() {
return JiveGlobals.getIntProperty("workgroup.search.frequency.execution", 5);
}
/**
* Returns the maximum number of transcripts that can be generated since the last update
* process was executed before forcing the update process to be executed. If the returned
* value is <= 0 then this functionality will be ignored.<p>
* <p/>
* In summary, the update process runs periodically but it may be force to be executed
* if a certain number of transcripts have been generated since the last update execution.
*
* @return the maximum number of transcripts that can be generated since the last update
* process was executed.
*/
private static int getMaxPendingTranscripts() {
return JiveGlobals.getIntProperty("workgroup.search.pending.transcripts", 5);
}
/**
* Returns the number of hours to wait until the next optimization. Optimizing the index makes
* the searches faster and reduces the number of files too.
*/
private static int getOptimizationFrequency() {
return JiveGlobals.getIntProperty("workgroup.search.frequency.optimization", 24);
}
ChatSearchManager(Workgroup workgroup) {
this.workgroup = workgroup;
searchDirectory = parentFolder + File.separator + workgroup.getJID().getNode();
loadAnalyzer();
loadLastUpdated();
WorkgroupEventDispatcher.addListener(this);
}
/**
* Load the search analyzer. A custom analyzer class will be used if it is defined.
*/
private void loadAnalyzer() {
Analyzer analyzer = null;
String analyzerClass = null;
String words = null;
// First check if the workgroup should use a special Analyzer
analyzerClass = workgroup.getProperties().getProperty("search.analyzer.className");
if (analyzerClass != null) {
words = workgroup.getProperties().getProperty("search.analyzer.stopWordList");
}
else {
// Use the global analyzer
analyzerClass = getAnalyzerClass();
words = JiveGlobals.getProperty("workgroup.search.analyzer.stopWordList");
}
// get stop word list is there was one
List stopWords = new ArrayList();
if (words != null) {
StringTokenizer st = new StringTokenizer(words, ",");
while (st.hasMoreTokens()) {
stopWords.add(st.nextToken().trim());
}
}
try {
analyzer = getAnalyzerInstance(analyzerClass, stopWords);
}
catch (Exception e) {
ComponentManagerFactory.getComponentManager().getLog().error("Error loading custom " +
"search analyzer: " + analyzerClass, e);
}
// If the analyzer is null, use the standard analyzer.
if (analyzer == null && stopWords.size() > 0) {
analyzer = new StandardAnalyzer((String[])stopWords.toArray(new String[stopWords.size()]));
}
else if (analyzer == null) {
analyzer = new StandardAnalyzer();
}
indexerAnalyzer = analyzer;
}
private Analyzer getAnalyzerInstance(String analyzerClass, List stopWords) throws Exception {
Analyzer analyzer = null;
// Load the class.
Class c = null;
try {
c = ClassUtils.forName(analyzerClass);
}
catch (ClassNotFoundException e) {
c = getClass().getClassLoader().loadClass(analyzerClass);
}
// Create an instance of the custom analyzer.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -