📄 filesystemcollection.java
字号:
/*
* Copyright 2003-2004 Michael Franken, Zilverline.
*
* The contents of this file, or the files included with this file, are subject to
* the current version of ZILVERLINE Collaborative Source License for the
* Zilverline Search Engine (the "License"); You may not use this file except in
* compliance with the License.
*
* You may obtain a copy of the License at
*
* http://www.zilverline.org.
*
* See the License for the rights, obligations and
* limitations governing use of the contents of the file.
*
* The Original and Upgraded Code is the Zilverline Search Engine. The developer of
* the Original and Upgraded Code is Michael Franken. Michael Franken owns the
* copyrights in the portions it created. All Rights Reserved.
*
*/
package org.zilverline.core;
import java.io.File;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.springframework.util.StringUtils;
import org.zilverline.service.CollectionManagerImpl;
import org.zilverline.util.FileUtils;
import org.zilverline.util.StopWatch;
/**
* A Collection is a number of documents in a directory that are indexed together.
*
* @author Michael Franken
* @version $Revision: 1.19 $
*/
public class FileSystemCollection extends AbstractCollection {
/** logger for Commons logging. */
private static Log log = LogFactory.getLog(FileSystemCollection.class);
/**
* Default Constructor setting all fields to non null defaults.
*/
public FileSystemCollection() {
name = "";
url = "";
description = "";
numberOfDocs = 0;
version = 0;
lastIndexed = null;
existsOnDisk = false;
keepCache = false;
isKeepCacheSet = false;
// other constructor stuff should appear here first ...
log.debug("in constructor - initializing...");
}
/**
* Sets existsOnDisk based on whether the collection (contentDir) actually (now) sits on disk.
*
* @todo the whole existsOnDisk construction is a little funny, refactor some time
*/
protected void setExistsOnDisk() {
if (contentDir == null) {
existsOnDisk = false;
} else {
existsOnDisk = contentDir.isDirectory();
}
}
/**
* Gets the origin from where this collection's documents can be retrieved.
*
* @return location such as e:/docs or InBox
*/
public final String getRoot() {
if (getContentDir() == null) {
return "-";
}
return getContentDir().getAbsolutePath();
}
/**
* Prints Collection as String for logging.
*
* @return pretty formatted information about the collection
*/
public final String toString() {
return "Collection(" + id + "), with name: " + name + ",\n\t\tdescription: " + description + ",\n\t\tcontentDir: "
+ contentDir + ",\n\t\turl: " + url + ",\n\t\texistsOnDisk: " + existsOnDisk + ",\n\t\tindexDir: " + indexDir
+ ",\n\t\tcacheDir: " + cacheDir + ",\n\t\tcacheUrl: " + cacheUrl + ",\n\t\tanalyzer: " + analyzer
+ ",\n\t\tkeepCache: " + keepCache + ",\n\t\tisKeepCacheSet: " + isKeepCacheSet + ",\n\t\tnumberOfDocs: "
+ numberOfDocs + ",\n\t\tmanager: " + manager + ",\n\t\tlastIndexed: " + lastIndexed;
// +
// ",\n\t\tmd5DocumentCache:
// " + md5DocumentCache +
// "\n\n";
}
/**
* Index the given Collection.
*
* @param fullIndex indicated whether a full or incremental index should be created
* @throws IndexException if the Collections can not be indexed
*/
public final void index(final boolean fullIndex) throws IndexException {
log.info("Starting creation of index of " + this.getContentDir());
IndexWriter writer = null;
try {
// record start time
StopWatch watch = new StopWatch();
watch.start();
// make sure the index exists
File indexDirectory = this.getIndexDirWithManagerDefaults();
// reindex if the index is not there or invalid
int currentNumberOfDocs = 0;
boolean mustReindex = fullIndex;
if (!this.isIndexValid()) {
mustReindex = true;
indexDirectory.mkdirs();
} else {
currentNumberOfDocs = getNumberOfDocs();
}
// create an index(writer)
writer = new IndexWriter(indexDirectory, this.createAnalyzer(), mustReindex);
// see whether there are specific indexing settings in manager
if (manager.getMergeFactor() != null) {
writer.setMergeFactor(manager.getMergeFactor().intValue());
}
if (manager.getMinMergeDocs() != null) {
writer.setMaxBufferedDocs(manager.getMinMergeDocs().intValue());
}
if (manager.getMaxMergeDocs() != null) {
writer.setMaxMergeDocs(manager.getMaxMergeDocs().intValue());
}
resetCache(fullIndex);
// prepare Index parameters
IndexCommand ic = new IndexCommand();
ic.setWriter(writer);
ic.setCollection(this);
ic.setFile(this.getContentDir());
ic.setInZip(false);
ic.setStart(true);
// and start indexing
this.indexDocs(ic);
log.debug("Optimizing index of " + this.getContentDir());
writer.optimize();
// update the info of this collection
this.init();
// record end time and report duration of indexing
watch.stop();
log.info("Indexed " + (writer.docCount() - currentNumberOfDocs) + " new documents in " + watch.elapsedTime());
}
catch (IOException e) {
throw new IndexException("Error indexing '" + this.getName() + "'. Possibly unable to remove old index", e);
}
catch (Exception e) {
throw new IndexException("Error indexing '" + this.getName() + "'", e);
}
finally {
if (writer != null) {
try {
writer.close();
}
catch (IOException e1) {
// assume the index is made, just can't close, so don't
// rethrow, just log
log.error("Error closing index for " + this.getName(), e1);
}
}
}
}
/**
* Index the given Collection.
*
* @param fullIndex indicated whether a full or incremental index should be created
* @throws IndexException if the Collections can not be indexed
*/
// TODO: this really looks like the previous method: refactor!
public final void indexFile(final File theFile) throws IndexException {
log.info("Adding File " + theFile + " to collection " + name);
IndexWriter writer = null;
try {
// record start time
StopWatch watch = new StopWatch();
watch.start();
// make sure the index exists
File indexDirectory = this.getIndexDirWithManagerDefaults();
int currentNumberOfDocs = getNumberOfDocs();
boolean reindex = false;
if (!isIndexValid()) {
log.debug("Index for " + name + " is not valid, create a new one");
reindex = true;
}
// create an index(writer)
writer = new IndexWriter(indexDirectory, this.createAnalyzer(), reindex);
// see whether there are specific indexing settings in manager
if (manager.getMergeFactor() != null) {
writer.setMergeFactor(manager.getMergeFactor().intValue());
}
if (manager.getMinMergeDocs() != null) {
writer.setMaxBufferedDocs(manager.getMinMergeDocs().intValue());
}
if (manager.getMaxMergeDocs() != null) {
writer.setMaxMergeDocs(manager.getMaxMergeDocs().intValue());
}
// prepare Index parameters
IndexCommand ic = new IndexCommand();
ic.setWriter(writer);
ic.setCollection(this);
ic.setFile(theFile);
ic.setInZip(false);
ic.setStart(true);
// and start indexing
this.indexDocs(ic);
log.debug("Optimizing index of " + this.getContentDir());
writer.optimize();
// update the info of this collection
this.init();
// record end time and report duration of indexing
watch.stop();
log.info("Indexed " + (writer.docCount() - currentNumberOfDocs) + " new documents in " + watch.elapsedTime());
}
catch (IOException e) {
throw new IndexException("Error indexing '" + this.getName() + "'. Possibly unable to remove old index", e);
}
catch (Exception e) {
throw new IndexException("Error indexing '" + this.getName() + "'", e);
}
finally {
if (writer != null) {
try {
writer.close();
}
catch (IOException e1) {
// assume the index is made, just can't close, so don't
// rethrow, just log
log.error("Error closing index for " + this.getName(), e1);
}
}
}
}
/**
* Reads a File from IndexCommand (a directory, 'straight' file or an archive) and creates an index for all files recursively.
*
* <p>
* now supports pdf, rtf, html, txt, rar, zip, chm and doc formats.
* </p>
*
* @param ic IndexCommand
*
* @throws IndexException when Indexing stops
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -