📄 abstractcollection.java
字号:
/*
* Copyright 2003-2005 Michael Franken, Zilverline.
*
* The contents of this file, or the files included with this file, are subject to
* the current version of ZILVERLINE Collaborative Source License for the
* Zilverline Search Engine (the "License"); You may not use this file except in
* compliance with the License.
*
* You may obtain a copy of the License at
*
* http://www.zilverline.org.
*
* See the License for the rights, obligations and
* limitations governing use of the contents of the file.
*
* The Original and Upgraded Code is the Zilverline Search Engine. The developer of
* the Original and Upgraded Code is Michael Franken. Michael Franken owns the
* copyrights in the portions it created. All Rights Reserved.
*
*/
package org.zilverline.core;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.util.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.zilverline.service.CollectionManager;
import org.zilverline.util.FileUtils;
/**
* AbstractCollection provides common implementation for all
* DocumentCollections.
*
* @author Michael Franken
* @version $Revision: 1.12 $
*/
public abstract class AbstractCollection implements DocumentCollection {
/** logger for Commons logging. */
private static Log log = LogFactory.getLog(AbstractCollection.class);
/**
* String representation of Analyzer.
*/
protected String analyzer;
/** The Analyzer to be used in indexing and searching. */
protected transient Analyzer analyzerObject = null;
/**
* The archive cache is used to store the keys of archives that are
* extracted 'on-the-fly'.
*/
protected transient Set archiveCache;
/**
* The cacheDir is the directory this collection's cache is stored at.
*
* <p>
* The cache is used to (temporarily) store expanded content, such as zip
* files.
* </p>
*/
protected File cacheDir;
/**
* The cacheUrl is the location this collection's cache (if any) is mapped
* to as a result of a search. e.g. d:\temp\cache\books\java could be mapped
* to https://server/cache/path
*/
protected String cacheUrl;
/**
* The contentDir is the directory this collection is stored at.
* <p>
* e.g. d:\books\java
* </p>
*
* <p>
* The <code>contentDir</code> needs to point to an existing directory in
* order to be indexed.
* </p>
*/
protected File contentDir;
/** Description of collection. */
protected String description;
/** Indicates whether a collection actually esists on disk. */
protected transient boolean existsOnDisk;
/** id indicates identity. Used for persistency and presentation. */
protected Long id;
/**
* The indexDir is the directory where the index is stored.
*
* <p>
* e.g. d:\temp\zilverline\index
* </p>
*/
protected File indexDir;
/**
* The thread used to index this collection.
*
*/
protected transient Thread indexingThread;
/**
* Attribute used to find out whether <code>keepCache</code> has been set
* externally (using setter).
*/
protected transient boolean isKeepCacheSet;
/**
* Indicated whether the cache should be removed after indexing.
*
* <p>
* If not, search results can return files in for instance zip files.
* </p>
*/
protected boolean keepCache;
/** The date of the last index of this collection. */
protected transient Date lastIndexed;
/** Reference back to the collectionManager. */
protected transient CollectionManager manager;
/** This cache is used to store the MD5 keys of all indexed documents. */
protected transient Set md5DocumentCache;
/** Name of collection, also used as part of the name of index. */
protected String name;
/**
* Variable used to possibly stop the indexing thread.
*/
protected transient boolean stopRequested;
/**
* Number of Documents in this collection. Can only be set by actually
* consulting the corresponding index
*/
protected transient int numberOfDocs;
/**
* The url is the location this collection is mapped to as a result of a
* search. e.g. d:\books\java could be mapped to https://server/path/java/
*/
protected String url;
/** The version of the index of this collection. */
protected transient long version;
/**
* Returns an Analyzer for this collection based on configuration.
*
* @return the Analyzer used to index and search this collection
* @todo the analyzer setting and creation is a bit funny, refactor some
* time
* @see Analyzer
*/
public final Analyzer createAnalyzer() {
if (analyzerObject != null) {
return analyzerObject;
} else {
return manager.createAnalyzer();
}
}
/**
* Determine whether the collection (contentDir) actually (now) exists on
* disk.
*
* @return true if the collection exists
*/
public final boolean existsOnDisk() {
setExistsOnDisk();
return existsOnDisk;
}
/**
* Get the Analyzer.
*
* @return the Analyzer as String
* @see org.apache.lucene.analysis.Analyzer
*/
public final String getAnalyzer() {
return analyzer;
}
/**
* Gets the archive cache for this Collection.
*
* <p>
* The archive cache is used to store the keys of archives that are
* extracted 'on-the-fly'
* </p>
*
* @return HashSet containing archives that have been cached (so they have
* been extracted)
*/
public final Set getArchiveCache() {
return archiveCache;
}
/**
* Get the location where this collection's cache is kept on disk.
*
* @return Returns the cacheDir.
*/
public final File getCacheDir() {
return cacheDir;
}
/**
* Gets the directory where this collection's cache is stored. If the
* cacheDir is not set for this Collection, the name of this collection is
* used, possibly prepended with the (default) retrieved from the manager.
* The cache is used to (temporarily) store expanded content, such as zip
* files.
*
* @return The directory where the cache of this collection is stored on
* disk.
*/
public final File getCacheDirWithManagerDefaults() {
if ((cacheDir == null) || "".equals(cacheDir.toString())) {
if (manager != null) {
// create a filename from the default index location and the
// name of this collection
return new File(manager.getCacheBaseDir(), name);
} else {
log.warn("Manager for " + name + " should not be null");
return new File(name, "cache");
}
}
return cacheDir;
}
/**
* Gets the URL where this collection's cached documents can be retrieved.
*
* @return Returns the cacheUrl.
*/
public final String getCacheUrl() {
return cacheUrl;
}
/**
* The URL maps the cacheDir to another location.
*
* <p>
* e.g. A document 'ldap.pdf' in cacheDir 'e:\collection\cache\books\' with
* an cacheURL of 'http://search.company.com/cachedBooks/' will be returned
* in a search result as
* <code>http://search.company.com/cachedBooks/ldap.pdf</code>
* </p>
*
* @return the cacheUrl of the collection, or the cacheDir as URL if url is
* null or empty.
*/
public final String getCacheUrlWithManagerDefaults() {
if (StringUtils.hasLength(cacheUrl)) {
if (cacheUrl.endsWith("/")) {
return cacheUrl;
} else {
return cacheUrl + "/";
}
} else {
return "file://"
+ getCacheDirWithManagerDefaults().toURI().getPath();
}
}
/**
* Gets the location where this collection's documents can be retrieved.
*
* @return contentDir directory of collection
*/
public final File getContentDir() {
return contentDir;
}
/**
* Gets the origin from where this collection's documents can be retrieved.
*
* @return location such as e:/docs or InBox
*/
public abstract String getRoot();
/**
* Get the description of the collection.
*
* @return description for the collection
*/
public final String getDescription() {
return description;
}
/**
* Get the id of the collection.
*
* @return unique id, can be null
*/
public final Long getId() {
return id;
}
/**
* Get the location where this collection's index is kept on disk.
*
* @return the indexDir, possibly null.
*/
public final File getIndexDir() {
return indexDir;
}
/**
* 'Calculates' the directory where the index of this collection is stored
* on disk. If the indexDir is not set for this Collection, the name of this
* collection is used, possibly prepended with the baseDir retrieved from
* the manager.
*
* @return The directory where the index of this collection is stored on
* disk, never null
*/
public final File getIndexDirWithManagerDefaults() {
if ((indexDir == null) || "".equals(indexDir.toString())) {
if (manager != null) {
// create a filename from the default index location and the
// name of this collection
return new File(manager.getIndexBaseDir(), name);
} else {
log.warn("Manager for " + name + " should not be null");
return new File(name, "index");
}
}
return indexDir;
}
/**
* Return the date of the last Index.
*
* @return date of last Index, may return null
*/
public final Date getLastIndexed() {
return lastIndexed;
}
/**
* Get the collection's manager.
*
* @todo remove this dependency to service layer.
*
* @return Reference to the CollectionManager holding this Collection.
*/
public final CollectionManager getManager() {
return manager;
}
/**
* Gets the cache of MD5 hashes of all documents (previously) indexed.
*
* @return HashSet containing hashes of all documents (previously) indexed
*/
public final Set getMd5DocumentCache() {
return md5DocumentCache;
}
/**
* Get the name of this collection.
*
* @return name of collection
*/
public final String getName() {
return name;
}
/**
* Get the number of documents in this collection. The number is not
* calculated, but stored after indexing process, so it is a cheap
* operation.
*
* @return number of documents in collection
*/
public final int getNumberOfDocs() {
if (isIndexingInProgress()) {
IndexReader index = null;
try {
File thisIndex = getIndexDirWithManagerDefaults();
index = IndexReader.open(thisIndex);
if (index != null) {
return index.numDocs();
}
} catch (IOException e) {
log
.warn("Error getting index for collection '" + name
+ "'", e);
} finally {
if (index != null) {
try {
index.close();
} catch (IOException e1) {
log.error("Error closing index for collection " + name,
e1);
}
}
}
}
return numberOfDocs;
}
/**
* Gets the URL where this collection's documents can be retrieved.
*
* @return the url
*/
public final String getUrl() {
return url;
}
/**
* Determines the URL of the collection.
* <p>
* The URL maps the contentDir to another location. e.g. A document
* 'ldap.pdf' in contentDir 'e:\collection\books\' with an URL of
* 'http://search.company.com/books/' will be returned in a search result as
* <code>http://search.company.com/books/ldap.pdf</code>
* </p>
*
* @return the URL of the collection as a String, possibly null in the
* exeptional case where there is no contentDir
*/
public final String getUrlDefault() {
if (StringUtils.hasLength(url)) {
if (url.endsWith("/")) {
return url;
} else {
return url + "/";
}
} else {
if (contentDir != null) {
return "file://" + contentDir.toURI().getPath();
} else {
log.warn("Collection " + name + " does not have a contentDir.");
return null;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -