📄 chatsearchmanager.java

📁 openfire 服务器源码下载
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/**
 * $RCSfile$
 * $Revision: 29543 $
 * $Date: 2006-04-19 15:38:04 -0700 (Wed, 19 Apr 2006) $
 *
 * Copyright (C) 2004-2008 Jive Software. All rights reserved.
 *
 * This software is published under the terms of the GNU Public License (GPL),
 * a copy of which is included in this distribution, or a commercial license
 * agreement with Jive.
 */

package org.jivesoftware.xmpp.workgroup.search;

import org.jivesoftware.xmpp.workgroup.AgentSession;
import org.jivesoftware.xmpp.workgroup.Workgroup;
import org.jivesoftware.xmpp.workgroup.event.WorkgroupEventDispatcher;
import org.jivesoftware.xmpp.workgroup.event.WorkgroupEventListener;
import org.jivesoftware.openfire.fastpath.providers.ChatNotes;
import org.jivesoftware.openfire.fastpath.util.TaskEngine;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.jivesoftware.database.DbConnectionManager;
import org.jivesoftware.util.JiveGlobals;
import org.jivesoftware.util.StringUtils;
import org.jivesoftware.util.ClassUtils;
import org.xmpp.component.ComponentManagerFactory;
import org.xmpp.component.Log;
import org.xmpp.packet.JID;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/**
 * Manages the transcript search feature by defining properties of the search indexer. Each
 * workgroup will use an instance of this class. Each instance can be configured according to
 * the needs of each workgroup or may just use the global configuration. Read the properties
 * section below to learn the variables that can be configured globaly and per workgroup.<p>
 * <p/>
 * Indexing can either be done real-time by calling updateIndex(boolean) or rebuildIndex(). Out of
 * the box Live Assistant runs the indexer in timed update mode with a queue that holds the
 * generated transcripts since the last update. Once the queue has been filled full an update will
 * be forced even before the time interval has not been completed. It is possible to configure the
 * size of the queue or even disable it and only update the index based on a timed update.<p>
 * <p/>
 * The automated updating mode can be adjusted by setting how often batch indexing is done. You
 * can adjust this interval to suit your needs. Frequent updates mean that transcripts will be
 * searchable more quickly. Less frequent updates use fewer system resources.<p>
 * <p/>
 * The following global properties are used by this class. Global properties will apply to all the
 * workgroups unless the workgroup has overriden the property.
 * <ul>
 * <li><tt>workgroup.search.frequency.execution</tt> -- number of minutes to wait until the next
 * update process is performed. Default is <tt>5</tt> minutes.</li>
 * <li><tt>workgroup.search.pending.transcripts</tt> -- maximum number of transcripts that can be
 * generated since the last update process was executed before forcing the update process to
 * be executed. A value of -1 disables this feature. Default is <tt>5</tt> transcripts.</li>
 * <li><tt>workgroup.search.frequency.optimization</tt> -- number of hours to wait until the next
 * optimization. Default is <tt>24</tt> hours.</li>
 * <li><tt>workgroup.search.analyzer.className</tt> -- name of the Lucene analyzer class to be
 * used for indexing. If none was defined then {@link StandardAnalyzer} will be used.</li>
 * <li><tt>workgroup.search.analyzer.stopWordList</tt> -- String[] of words to use in the global
 * analyzer. If none was defined then the default stop words defined in Lucene will be used.
 * </li>
 * <li><tt>workgroup.search.maxdays</tt> -- maximum number of days a transcript could be old in
 * order to be included when rebuilding the index. Default is <tt>365</tt> days.</li>
 * </ul>
 * <p/>
 * The following workgroup properties are used by this class. Each workgroup has the option to
 * override the corresponding defined global property.
 * <ul>
 * <li><tt>search.analyzer.className</tt> -- name of the Lucene analyzer class to be
 * used for indexing. If none was defined then the value defined in
 * <tt>workgroup.search.analyzer.className</tt> will be used instead.</li>
 * <li><tt>search.analyzer.stopWordList</tt> -- String[] of words to use in the analyzer defined
 * for the workgroup. If none was defined then the default stop words defined in Lucene will
 * be used.</li>
 * <li><tt>search.maxdays</tt> -- maximum number of days a transcript could be old in
 * order to be included when rebuilding the index. If none was defined then the value defined
 * in <tt>workgroup.search.maxdays</tt> will be used.</li>
 * </ul>
 *
 * @author Gaston Dombiak
 */
public class ChatSearchManager implements WorkgroupEventListener {

    private static final String CHATS_SINCE_DATE =
            "SELECT sessionID,transcript,startTime FROM fpSession WHERE workgroupID=? AND " +
                    "startTime>? AND transcript IS NOT NULL ORDER BY startTime";
    private static final String AGENTS_IN_SESSION =
            "SELECT agentJID FROM fpAgentSession WHERE sessionID=?";
    private static final String LOAD_DATES =
            "SELECT lastUpdated,lastOptimization FROM fpSearchIndex WHERE workgroupID=?";
    private static final String INSERT_DATES =
            "INSERT INTO fpSearchIndex(workgroupID, lastUpdated, lastOptimization) VALUES(?,?,?)";
    private static final String UPDATE_DATES =
            "UPDATE fpSearchIndex SET lastUpdated=?,lastOptimization=? WHERE workgroupID=?";
    private static final String DELETE_DATES =
            "DELETE FROM fpSearchIndex WHERE workgroupID=?";

    private static Map<String, ChatSearchManager> instances = new ConcurrentHashMap<String, ChatSearchManager>();

    /**
     * Holds the path to the parent folder of the folders that will store the workgroup
     * index files.
     */
    private static String parentFolder = JiveGlobals.getHomeDirectory() + File.separator + "index";
    private static final long ONE_HOUR = 60 * 60 * 1000;

    /**
     * Hold the workgroup whose chats are being indexed by this instance. Each workgroup will
     * have a ChatSearchManager since each ChatSearchManager may use a different Analyzer according
     * to the workgroup needs.
     */
    private Workgroup workgroup;
    private Analyzer indexerAnalyzer;
    private String searchDirectory;
    private Searcher searcher = null;
    private IndexReader searcherReader = null;
    ReadWriteLock searcherLock = new ReentrantReadWriteLock();

    /**
     * Holds the date of the last chat that was added to the index. This information is used for
     * getting the new chats since this date that should be added to the index.
     */
    private Date lastUpdated;
    /**
     * Keeps the last time when the index was optimized. The index is optimized once a day.
     */
    private Date lastOptimization;
    /**
     * Keeps the last date when the updating process was executed. Every time
     * {@link #updateIndex(boolean)} or {@link #rebuildIndex()} are invoked this variable will
     * be updated.
     */
    private Date lastExecution;
    /**
     * Keeps the number of transcripts that have been generated since the last update process
     * was executed.
     */
    private AtomicInteger pendingTranscripts = new AtomicInteger(0);
    /**
     * Caches the filters for performance. The cached filters will be cleared when the index is
     * modified.
     */
    private ConcurrentHashMap<String, Filter> cachedFilters = new ConcurrentHashMap<String, Filter>();

    static {
        // Check if we need to create the parent folder
        File dir = new File(parentFolder);
        if (!dir.exists() || !dir.isDirectory()) {
            dir.mkdir();
        }
    }

    /**
     * Returns the ChatSearchManager that should be used for a given {@link Workgroup}. The index
     * Analyzer that the returned ChatSearchManager will use could be determined by the workgroup
     * property <tt>search.analyzer.className</tt>. If the workgroup property has not been defined
     * then the global Analyzer will be used.<p>
     * <p/>
     * The class of the global Analyzer can be specified setting the
     * <tt>workgroup.search.analyzer.className</tt> property. If this property does not exist
     * then a {@link StandardAnalyzer} will be used as the global Analyzer..
     *
     * @param workgroup the workgroup to index.
     * @return the ChatSearchManager that should be used for a given workgroup.
     */
    public static ChatSearchManager getInstanceFor(Workgroup workgroup) {
        String workgroupName = workgroup.getJID().getNode();
        ChatSearchManager answer = instances.get(workgroupName);
        if (answer == null) {
            synchronized (workgroupName.intern()) {
                answer = instances.get(workgroupName);
                if (answer == null) {
                    answer = new ChatSearchManager(workgroup);
                    instances.put(workgroupName, answer);
                }
            }
        }
        return answer;
    }

    /**
     * Returns the Lucene analyzer class that is be used for indexing. The analyzer class
     * name is stored as the Jive Property <tt>workgroup.search.analyzer.className</tt>.
     *
     * @return the name of the analyzer class that is used for indexing.
     */
    public static String getAnalyzerClass() {
        String analyzerClass = JiveGlobals.getProperty("workgroup.search.analyzer.className");
        if (analyzerClass == null) {
            return StandardAnalyzer.class.getName();
        }
        else {
            return analyzerClass;
        }
    }

    /**
     * Sets the Lucene analyzer class that is used for indexing. Anytime the analyzer class
     * is changed, the search index must be rebuilt for searching to work reliably. The analyzer
     * class name is stored as the Jive Property <tt>workgroup.search.analyzer.className</tt>.
     *
     * @param className the name of the analyzer class will be used for indexing.
     */
    public static void setAnalyzerClass(String className) {
        if (className == null) {
            throw new NullPointerException("Argument is null.");
        }
        // If the setting hasn't changed, do nothing.
        if (className.equals(getAnalyzerClass())) {
            return;
        }
        JiveGlobals.setProperty("workgroup.search.analyzer.className", className);
    }

    /**
     * Notification message saying that the workgroup service is being shutdown. Release all
     * the instances so the GC can claim all the workgroup objects.
     */
    public static void shutdown() {
        for (ChatSearchManager manager : instances.values()) {
             manager.stop();
        }
        instances.clear();
    }

    private void stop() {
        WorkgroupEventDispatcher.removeListener(this);
    }

    /**
     * Returns the number of minutes to wait until the next update process is performed. The update
     * process may be executed before the specified frequency if a given number of transcripts
     * have been generated since the last execution. The maximum number of transcripts that can
     * be generated before triggering the update process is specified by
     * {@link #getMaxPendingTranscripts()}.
     */
    private static int getExecutionFrequency() {
        return JiveGlobals.getIntProperty("workgroup.search.frequency.execution", 5);
    }

    /**
     * Returns the maximum number of transcripts that can be generated since the last update
     * process was executed before forcing the update process to be executed. If the returned
     * value is <= 0 then this functionality will be ignored.<p>
     * <p/>
     * In summary, the update process runs periodically but it may be force to be executed
     * if a certain number of transcripts have been generated since the last update execution.
     *
     * @return the maximum number of transcripts that can be generated since the last update
     *         process was executed.
     */
    private static int getMaxPendingTranscripts() {
        return JiveGlobals.getIntProperty("workgroup.search.pending.transcripts", 5);
    }

    /**
     * Returns the number of hours to wait until the next optimization. Optimizing the index makes
     * the searches faster and reduces the number of files too.
     */
    private static int getOptimizationFrequency() {
        return JiveGlobals.getIntProperty("workgroup.search.frequency.optimization", 24);
    }

    ChatSearchManager(Workgroup workgroup) {
        this.workgroup = workgroup;
        searchDirectory = parentFolder + File.separator + workgroup.getJID().getNode();
        loadAnalyzer();
        loadLastUpdated();
        WorkgroupEventDispatcher.addListener(this);
    }

    /**
     * Load the search analyzer. A custom analyzer class will be used if it is defined.
     */
    private void loadAnalyzer() {
        Analyzer analyzer = null;

        String analyzerClass = null;
        String words = null;
        // First check if the workgroup should use a special Analyzer
        analyzerClass = workgroup.getProperties().getProperty("search.analyzer.className");
        if (analyzerClass != null) {
            words = workgroup.getProperties().getProperty("search.analyzer.stopWordList");
        }
        else {
            // Use the global analyzer
            analyzerClass = getAnalyzerClass();
            words = JiveGlobals.getProperty("workgroup.search.analyzer.stopWordList");
        }

        // get stop word list is there was one
        List stopWords = new ArrayList();
        if (words != null) {
            StringTokenizer st = new StringTokenizer(words, ",");
            while (st.hasMoreTokens()) {
                stopWords.add(st.nextToken().trim());
            }
        }
        try {
            analyzer = getAnalyzerInstance(analyzerClass, stopWords);
        }
        catch (Exception e) {
            ComponentManagerFactory.getComponentManager().getLog().error("Error loading custom " +
                    "search analyzer: " + analyzerClass, e);
        }
        // If the analyzer is null, use the standard analyzer.
        if (analyzer == null && stopWords.size() > 0) {
            analyzer = new StandardAnalyzer((String[])stopWords.toArray(new String[stopWords.size()]));
        }
        else if (analyzer == null) {
            analyzer = new StandardAnalyzer();
        }

        indexerAnalyzer = analyzer;
    }

    private Analyzer getAnalyzerInstance(String analyzerClass, List stopWords) throws Exception {
        Analyzer analyzer = null;
        // Load the class.
        Class c = null;
        try {
            c = ClassUtils.forName(analyzerClass);
        }
        catch (ClassNotFoundException e) {
            c = getClass().getClassLoader().loadClass(analyzerClass);
        }
        // Create an instance of the custom analyzer.
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -