⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 filesystemcollection.java

📁 很好的搜索代码,大家都很难下载!抓紧时间啊!不要错过!
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 * Copyright 2003-2004 Michael Franken, Zilverline.
 *
 * The contents of this file, or the files included with this file, are subject to
 * the current version of ZILVERLINE Collaborative Source License for the
 * Zilverline Search Engine (the "License"); You may not use this file except in
 * compliance with the License.
 *
 * You may obtain a copy of the License at
 *
 *     http://www.zilverline.org.
 *
 * See the License for the rights, obligations and
 * limitations governing use of the contents of the file.
 *
 * The Original and Upgraded Code is the Zilverline Search Engine. The developer of
 * the Original and Upgraded Code is Michael Franken. Michael Franken owns the
 * copyrights in the portions it created. All Rights Reserved.
 *
 */

package org.zilverline.core;

import java.io.File;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

import org.springframework.util.StringUtils;
import org.zilverline.service.CollectionManagerImpl;
import org.zilverline.util.FileUtils;
import org.zilverline.util.StopWatch;

/**
 * A Collection is a number of documents in a directory that are indexed together.
 * 
 * @author Michael Franken
 * @version $Revision: 1.19 $
 */
public class FileSystemCollection extends AbstractCollection {
    /** logger for Commons logging. */
    private static Log log = LogFactory.getLog(FileSystemCollection.class);

    /**
     * Default Constructor setting all fields to non null defaults.
     */
    public FileSystemCollection() {
        name = "";
        url = "";
        description = "";
        numberOfDocs = 0;
        version = 0;
        lastIndexed = null;
        existsOnDisk = false;
        keepCache = false;
        isKeepCacheSet = false;
        // other constructor stuff should appear here first ...
        log.debug("in constructor - initializing...");
    }

    /**
     * Sets existsOnDisk based on whether the collection (contentDir) actually (now) sits on disk.
     * 
     * @todo the whole existsOnDisk construction is a little funny, refactor some time
     */
    protected void setExistsOnDisk() {
        if (contentDir == null) {
            existsOnDisk = false;
        } else {
            existsOnDisk = contentDir.isDirectory();
        }
    }

    /**
     * Gets the origin from where this collection's documents can be retrieved.
     * 
     * @return location such as e:/docs or InBox
     */
    public final String getRoot() {
        if (getContentDir() == null) {
            return "-";
        }
        return getContentDir().getAbsolutePath();
    }

    /**
     * Prints Collection as String for logging.
     * 
     * @return pretty formatted information about the collection
     */
    public final String toString() {
        return "Collection(" + id + "), with name: " + name + ",\n\t\tdescription: " + description + ",\n\t\tcontentDir: "
            + contentDir + ",\n\t\turl: " + url + ",\n\t\texistsOnDisk: " + existsOnDisk + ",\n\t\tindexDir: " + indexDir
            + ",\n\t\tcacheDir: " + cacheDir + ",\n\t\tcacheUrl: " + cacheUrl + ",\n\t\tanalyzer: " + analyzer
            + ",\n\t\tkeepCache: " + keepCache + ",\n\t\tisKeepCacheSet: " + isKeepCacheSet + ",\n\t\tnumberOfDocs: "
            + numberOfDocs + ",\n\t\tmanager: " + manager + ",\n\t\tlastIndexed: " + lastIndexed;
        // +
        // ",\n\t\tmd5DocumentCache:
        // " + md5DocumentCache +
        // "\n\n";
    }

    /**
     * Index the given Collection.
     * 
     * @param fullIndex indicated whether a full or incremental index should be created
     * @throws IndexException if the Collections can not be indexed
     */
    public final void index(final boolean fullIndex) throws IndexException {
        log.info("Starting creation of index of " + this.getContentDir());

        IndexWriter writer = null;

        try {
            // record start time
            StopWatch watch = new StopWatch();

            watch.start();

            // make sure the index exists
            File indexDirectory = this.getIndexDirWithManagerDefaults();

            // reindex if the index is not there or invalid
            int currentNumberOfDocs = 0;
            boolean mustReindex = fullIndex;
            if (!this.isIndexValid()) {
                mustReindex = true;
                indexDirectory.mkdirs();
            } else {
                currentNumberOfDocs = getNumberOfDocs();
            }

            // create an index(writer)
            writer = new IndexWriter(indexDirectory, this.createAnalyzer(), mustReindex);
            // see whether there are specific indexing settings in manager
            if (manager.getMergeFactor() != null) {
                writer.setMergeFactor(manager.getMergeFactor().intValue());
            }
            if (manager.getMinMergeDocs() != null) {
                writer.setMaxBufferedDocs(manager.getMinMergeDocs().intValue());
            }

            if (manager.getMaxMergeDocs() != null) {
                writer.setMaxMergeDocs(manager.getMaxMergeDocs().intValue());
            }

            resetCache(fullIndex);

            // prepare Index parameters
            IndexCommand ic = new IndexCommand();

            ic.setWriter(writer);
            ic.setCollection(this);
            ic.setFile(this.getContentDir());
            ic.setInZip(false);
            ic.setStart(true);

            // and start indexing
            this.indexDocs(ic);
            log.debug("Optimizing index of " + this.getContentDir());
            writer.optimize();

            // update the info of this collection
            this.init();

            // record end time and report duration of indexing
            watch.stop();
            log.info("Indexed " + (writer.docCount() - currentNumberOfDocs) + " new documents in " + watch.elapsedTime());
        }
        catch (IOException e) {
            throw new IndexException("Error indexing '" + this.getName() + "'. Possibly unable to remove old index", e);
        }
        catch (Exception e) {
            throw new IndexException("Error indexing '" + this.getName() + "'", e);
        }
        finally {
            if (writer != null) {
                try {
                    writer.close();
                }
                catch (IOException e1) {
                    // assume the index is made, just can't close, so don't
                    // rethrow, just log
                    log.error("Error closing index for " + this.getName(), e1);
                }
            }
        }

    }

    /**
     * Index the given Collection.
     * 
     * @param fullIndex indicated whether a full or incremental index should be created
     * @throws IndexException if the Collections can not be indexed
     */
    // TODO: this really looks like the previous method: refactor!
    public final void indexFile(final File theFile) throws IndexException {
        log.info("Adding File " + theFile + " to collection " + name);

        IndexWriter writer = null;

        try {
            // record start time
            StopWatch watch = new StopWatch();

            watch.start();

            // make sure the index exists
            File indexDirectory = this.getIndexDirWithManagerDefaults();

            int currentNumberOfDocs = getNumberOfDocs();

            boolean reindex = false;
            if (!isIndexValid()) {
                log.debug("Index for " + name + " is not valid, create a new one");
                reindex = true;
            }

            // create an index(writer)
            writer = new IndexWriter(indexDirectory, this.createAnalyzer(), reindex);
            // see whether there are specific indexing settings in manager
            if (manager.getMergeFactor() != null) {
                writer.setMergeFactor(manager.getMergeFactor().intValue());
            }
            if (manager.getMinMergeDocs() != null) {
                writer.setMaxBufferedDocs(manager.getMinMergeDocs().intValue());
            }

            if (manager.getMaxMergeDocs() != null) {
                writer.setMaxMergeDocs(manager.getMaxMergeDocs().intValue());
            }

            // prepare Index parameters
            IndexCommand ic = new IndexCommand();

            ic.setWriter(writer);
            ic.setCollection(this);
            ic.setFile(theFile);
            ic.setInZip(false);
            ic.setStart(true);

            // and start indexing
            this.indexDocs(ic);
            log.debug("Optimizing index of " + this.getContentDir());
            writer.optimize();

            // update the info of this collection
            this.init();

            // record end time and report duration of indexing
            watch.stop();
            log.info("Indexed " + (writer.docCount() - currentNumberOfDocs) + " new documents in " + watch.elapsedTime());
        }
        catch (IOException e) {
            throw new IndexException("Error indexing '" + this.getName() + "'. Possibly unable to remove old index", e);
        }
        catch (Exception e) {
            throw new IndexException("Error indexing '" + this.getName() + "'", e);
        }
        finally {
            if (writer != null) {
                try {
                    writer.close();
                }
                catch (IOException e1) {
                    // assume the index is made, just can't close, so don't
                    // rethrow, just log
                    log.error("Error closing index for " + this.getName(), e1);
                }
            }
        }

    }

    /**
     * Reads a File from IndexCommand (a directory, 'straight' file or an archive) and creates an index for all files recursively.
     * 
     * <p>
     * now supports pdf, rtf, html, txt, rar, zip, chm and doc formats.
     * </p>
     * 
     * @param ic IndexCommand
     * 
     * @throws IndexException when Indexing stops

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -