📄 abstractcollection.java

📁 很好的搜索代码,大家都很难下载!抓紧时间啊!不要错过!
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*
 * Copyright 2003-2005 Michael Franken, Zilverline.
 *
 * The contents of this file, or the files included with this file, are subject to
 * the current version of ZILVERLINE Collaborative Source License for the
 * Zilverline Search Engine (the "License"); You may not use this file except in
 * compliance with the License.
 *
 * You may obtain a copy of the License at
 *
 *     http://www.zilverline.org.
 *
 * See the License for the rights, obligations and
 * limitations governing use of the contents of the file.
 *
 * The Original and Upgraded Code is the Zilverline Search Engine. The developer of
 * the Original and Upgraded Code is Michael Franken. Michael Franken owns the
 * copyrights in the portions it created. All Rights Reserved.
 *
 */

package org.zilverline.core;

import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.springframework.util.StringUtils;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;

import org.zilverline.service.CollectionManager;
import org.zilverline.util.FileUtils;

/**
 * AbstractCollection provides common implementation for all
 * DocumentCollections.
 * 
 * @author Michael Franken
 * @version $Revision: 1.12 $
 */
public abstract class AbstractCollection implements DocumentCollection {
	/** logger for Commons logging. */
	private static Log log = LogFactory.getLog(AbstractCollection.class);

	/**
	 * String representation of Analyzer.
	 */
	protected String analyzer;

	/** The Analyzer to be used in indexing and searching. */
	protected transient Analyzer analyzerObject = null;

	/**
	 * The archive cache is used to store the keys of archives that are
	 * extracted 'on-the-fly'.
	 */
	protected transient Set archiveCache;

	/**
	 * The cacheDir is the directory this collection's cache is stored at.
	 * 
	 * <p>
	 * The cache is used to (temporarily) store expanded content, such as zip
	 * files.
	 * </p>
	 */
	protected File cacheDir;

	/**
	 * The cacheUrl is the location this collection's cache (if any) is mapped
	 * to as a result of a search. e.g. d:\temp\cache\books\java could be mapped
	 * to https://server/cache/path
	 */
	protected String cacheUrl;

	/**
	 * The contentDir is the directory this collection is stored at.
	 * <p>
	 * e.g. d:\books\java
	 * </p>
	 * 
	 * <p>
	 * The <code>contentDir</code> needs to point to an existing directory in
	 * order to be indexed.
	 * </p>
	 */
	protected File contentDir;

	/** Description of collection. */
	protected String description;

	/** Indicates whether a collection actually esists on disk. */
	protected transient boolean existsOnDisk;

	/** id indicates identity. Used for persistency and presentation. */
	protected Long id;

	/**
	 * The indexDir is the directory where the index is stored.
	 * 
	 * <p>
	 * e.g. d:\temp\zilverline\index
	 * </p>
	 */
	protected File indexDir;

	/**
	 * The thread used to index this collection.
	 * 
	 */
	protected transient Thread indexingThread;

	/**
	 * Attribute used to find out whether <code>keepCache</code> has been set
	 * externally (using setter).
	 */
	protected transient boolean isKeepCacheSet;

	/**
	 * Indicated whether the cache should be removed after indexing.
	 * 
	 * <p>
	 * If not, search results can return files in for instance zip files.
	 * </p>
	 */
	protected boolean keepCache;

	/** The date of the last index of this collection. */
	protected transient Date lastIndexed;

	/** Reference back to the collectionManager. */
	protected transient CollectionManager manager;

	/** This cache is used to store the MD5 keys of all indexed documents. */
	protected transient Set md5DocumentCache;

	/** Name of collection, also used as part of the name of index. */
	protected String name;

	/**
	 * Variable used to possibly stop the indexing thread.
	 */
	protected transient boolean stopRequested;

	/**
	 * Number of Documents in this collection. Can only be set by actually
	 * consulting the corresponding index
	 */
	protected transient int numberOfDocs;

	/**
	 * The url is the location this collection is mapped to as a result of a
	 * search. e.g. d:\books\java could be mapped to https://server/path/java/
	 */
	protected String url;

	/** The version of the index of this collection. */
	protected transient long version;

	/**
	 * Returns an Analyzer for this collection based on configuration.
	 * 
	 * @return the Analyzer used to index and search this collection
	 * @todo the analyzer setting and creation is a bit funny, refactor some
	 *       time
	 * @see Analyzer
	 */
	public final Analyzer createAnalyzer() {
		if (analyzerObject != null) {
			return analyzerObject;
		} else {
			return manager.createAnalyzer();
		}
	}

	/**
	 * Determine whether the collection (contentDir) actually (now) exists on
	 * disk.
	 * 
	 * @return true if the collection exists
	 */
	public final boolean existsOnDisk() {
		setExistsOnDisk();
		return existsOnDisk;
	}

	/**
	 * Get the Analyzer.
	 * 
	 * @return the Analyzer as String
	 * @see org.apache.lucene.analysis.Analyzer
	 */
	public final String getAnalyzer() {
		return analyzer;
	}

	/**
	 * Gets the archive cache for this Collection.
	 * 
	 * <p>
	 * The archive cache is used to store the keys of archives that are
	 * extracted 'on-the-fly'
	 * </p>
	 * 
	 * @return HashSet containing archives that have been cached (so they have
	 *         been extracted)
	 */
	public final Set getArchiveCache() {
		return archiveCache;
	}

	/**
	 * Get the location where this collection's cache is kept on disk.
	 * 
	 * @return Returns the cacheDir.
	 */
	public final File getCacheDir() {
		return cacheDir;
	}

	/**
	 * Gets the directory where this collection's cache is stored. If the
	 * cacheDir is not set for this Collection, the name of this collection is
	 * used, possibly prepended with the (default) retrieved from the manager.
	 * The cache is used to (temporarily) store expanded content, such as zip
	 * files.
	 * 
	 * @return The directory where the cache of this collection is stored on
	 *         disk.
	 */
	public final File getCacheDirWithManagerDefaults() {
		if ((cacheDir == null) || "".equals(cacheDir.toString())) {
			if (manager != null) {
				// create a filename from the default index location and the
				// name of this collection
				return new File(manager.getCacheBaseDir(), name);
			} else {
				log.warn("Manager for " + name + " should not be null");
				return new File(name, "cache");
			}
		}

		return cacheDir;
	}

	/**
	 * Gets the URL where this collection's cached documents can be retrieved.
	 * 
	 * @return Returns the cacheUrl.
	 */
	public final String getCacheUrl() {
		return cacheUrl;
	}

	/**
	 * The URL maps the cacheDir to another location.
	 * 
	 * <p>
	 * e.g. A document 'ldap.pdf' in cacheDir 'e:\collection\cache\books\' with
	 * an cacheURL of 'http://search.company.com/cachedBooks/' will be returned
	 * in a search result as
	 * <code>http://search.company.com/cachedBooks/ldap.pdf</code>
	 * </p>
	 * 
	 * @return the cacheUrl of the collection, or the cacheDir as URL if url is
	 *         null or empty.
	 */
	public final String getCacheUrlWithManagerDefaults() {
		if (StringUtils.hasLength(cacheUrl)) {
			if (cacheUrl.endsWith("/")) {
				return cacheUrl;
			} else {
				return cacheUrl + "/";
			}
		} else {
			return "file://"
					+ getCacheDirWithManagerDefaults().toURI().getPath();
		}
	}

	/**
	 * Gets the location where this collection's documents can be retrieved.
	 * 
	 * @return contentDir directory of collection
	 */
	public final File getContentDir() {
		return contentDir;
	}

	/**
	 * Gets the origin from where this collection's documents can be retrieved.
	 * 
	 * @return location such as e:/docs or InBox
	 */
	public abstract String getRoot();

	/**
	 * Get the description of the collection.
	 * 
	 * @return description for the collection
	 */
	public final String getDescription() {
		return description;
	}

	/**
	 * Get the id of the collection.
	 * 
	 * @return unique id, can be null
	 */
	public final Long getId() {
		return id;
	}

	/**
	 * Get the location where this collection's index is kept on disk.
	 * 
	 * @return the indexDir, possibly null.
	 */
	public final File getIndexDir() {
		return indexDir;
	}

	/**
	 * 'Calculates' the directory where the index of this collection is stored
	 * on disk. If the indexDir is not set for this Collection, the name of this
	 * collection is used, possibly prepended with the baseDir retrieved from
	 * the manager.
	 * 
	 * @return The directory where the index of this collection is stored on
	 *         disk, never null
	 */
	public final File getIndexDirWithManagerDefaults() {
		if ((indexDir == null) || "".equals(indexDir.toString())) {
			if (manager != null) {
				// create a filename from the default index location and the
				// name of this collection
				return new File(manager.getIndexBaseDir(), name);
			} else {
				log.warn("Manager for " + name + " should not be null");
				return new File(name, "index");
			}
		}

		return indexDir;
	}

	/**
	 * Return the date of the last Index.
	 * 
	 * @return date of last Index, may return null
	 */
	public final Date getLastIndexed() {
		return lastIndexed;
	}

	/**
	 * Get the collection's manager.
	 * 
	 * @todo remove this dependency to service layer.
	 * 
	 * @return Reference to the CollectionManager holding this Collection.
	 */
	public final CollectionManager getManager() {
		return manager;
	}

	/**
	 * Gets the cache of MD5 hashes of all documents (previously) indexed.
	 * 
	 * @return HashSet containing hashes of all documents (previously) indexed
	 */
	public final Set getMd5DocumentCache() {
		return md5DocumentCache;
	}

	/**
	 * Get the name of this collection.
	 * 
	 * @return name of collection
	 */
	public final String getName() {
		return name;
	}

	/**
	 * Get the number of documents in this collection. The number is not
	 * calculated, but stored after indexing process, so it is a cheap
	 * operation.
	 * 
	 * @return number of documents in collection
	 */
	public final int getNumberOfDocs() {
		if (isIndexingInProgress()) {
			IndexReader index = null;
			try {
				File thisIndex = getIndexDirWithManagerDefaults();
				index = IndexReader.open(thisIndex);

				if (index != null) {
					return index.numDocs();
				}
			} catch (IOException e) {
				log
						.warn("Error getting index for collection '" + name
								+ "'", e);
			} finally {
				if (index != null) {
					try {
						index.close();
					} catch (IOException e1) {
						log.error("Error closing index for collection " + name,
								e1);
					}
				}
			}
		}
		return numberOfDocs;
	}

	/**
	 * Gets the URL where this collection's documents can be retrieved.
	 * 
	 * @return the url
	 */
	public final String getUrl() {
		return url;
	}

	/**
	 * Determines the URL of the collection.
	 * <p>
	 * The URL maps the contentDir to another location. e.g. A document
	 * 'ldap.pdf' in contentDir 'e:\collection\books\' with an URL of
	 * 'http://search.company.com/books/' will be returned in a search result as
	 * <code>http://search.company.com/books/ldap.pdf</code>
	 * </p>
	 * 
	 * @return the URL of the collection as a String, possibly null in the
	 *         exeptional case where there is no contentDir
	 */
	public final String getUrlDefault() {
		if (StringUtils.hasLength(url)) {
			if (url.endsWith("/")) {
				return url;
			} else {
				return url + "/";
			}
		} else {
			if (contentDir != null) {
				return "file://" + contentDir.toURI().getPath();
			} else {
				log.warn("Collection " + name + " does not have a contentDir.");
				return null;
			}
		}
12 下一页
💿 文件大小 133 K
👤 上传用户 cenxudong4
📂 所属分类 Java编程
🏷️ 相关标签

#搜索 #代码 #家
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -