segmentinfo.php

来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· PHP 代码 · 共 972 行 · 第 1/2 页

PHP
972
字号
<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://framework.zend.com/license/new-bsd
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to license@zend.com so we can send you a copy immediately.
 *
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage Index
 * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */

/** Zend_Search_Lucene_Index_DictionaryLoader */
require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';


/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';


/**
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage Index
 * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */
class Zend_Search_Lucene_Index_SegmentInfo
{
    /**
     * Number of docs in a segment
     *
     * @var integer
     */
    private $_docCount;

    /**
     * Segment name
     *
     * @var string
     */
    private $_name;

    /**
     * Term Dictionary Index
     *
     * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because
     * of performance considerations)
     * [0] -> $termValue
     * [1] -> $termFieldNum
     *
     * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
     *
     * @var array
     */
    private $_termDictionary;

    /**
     * Term Dictionary Index TermInfos
     *
     * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because
     * of performance considerations)
     * [0] -> $docFreq
     * [1] -> $freqPointer
     * [2] -> $proxPointer
     * [3] -> $skipOffset
     * [4] -> $indexPointer
     *
     * @var array
     */
    private $_termDictionaryInfos;

    /**
     * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
     *
     * @var array
     */
    private $_fields;

    /**
     * Field positions in a dictionary.
     * (Term dictionary contains filelds ordered by names)
     *
     * @var array
     */
    private $_fieldsDicPositions;


    /**
     * Associative array where the key is the file name and the value is data offset
     * in a compound segment file (.csf).
     *
     * @var array
     */
    private $_segFiles;

    /**
     * Associative array where the key is the file name and the value is file size (.csf).
     *
     * @var array
     */
    private $_segFileSizes;


    /**
     * File system adapter.
     *
     * @var Zend_Search_Lucene_Storage_Directory_Filesystem
     */
    private $_directory;

    /**
     * Normalization factors.
     * An array fieldName => normVector
     * normVector is a binary string.
     * Each byte corresponds to an indexed document in a segment and
     * encodes normalization factor (float value, encoded by
     * Zend_Search_Lucene_Search_Similarity::encodeNorm())
     *
     * @var array
     */
    private $_norms = array();

    /**
     * List of deleted documents.
     * bitset if bitset extension is loaded or array otherwise.
     *
     * @var mixed
     */
    private $_deleted;

    /**
     * $this->_deleted update flag
     *
     * @var boolean
     */
    private $_deletedDirty = false;


    /**
     * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname,
     * Documents count and Directory as a parameter.
     *
     * @param string $name
     * @param integer $docCount
     * @param Zend_Search_Lucene_Storage_Directory $directory
     */
    public function __construct($name, $docCount, $directory)
    {
        $this->_name = $name;
        $this->_docCount = $docCount;
        $this->_directory = $directory;
        $this->_termDictionary = null;

        $this->_segFiles = array();
        if ($this->_directory->fileExists($name . '.cfs')) {
            $cfsFile = $this->_directory->getFileObject($name . '.cfs');
            $segFilesCount = $cfsFile->readVInt();

            for ($count = 0; $count < $segFilesCount; $count++) {
                $dataOffset = $cfsFile->readLong();
                if ($count != 0) {
                    $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);
                }
                $fileName = $cfsFile->readString();
                $this->_segFiles[$fileName] = $dataOffset;
            }
            if ($count != 0) {
                $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;
            }
        }

        $fnmFile = $this->openCompoundFile('.fnm');
        $fieldsCount = $fnmFile->readVInt();
        $fieldNames = array();
        $fieldNums  = array();
        $this->_fields = array();
        for ($count=0; $count < $fieldsCount; $count++) {
            $fieldName = $fnmFile->readString();
            $fieldBits = $fnmFile->readByte();
            $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
                                                                            $fieldBits & 1,
                                                                            $count,
                                                                            $fieldBits & 2 );
            if ($fieldBits & 0x10) {
                // norms are omitted for the indexed field
                $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
            }

            $fieldNums[$count]  = $count;
            $fieldNames[$count] = $fieldName;
        }
        array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
        $this->_fieldsDicPositions = array_flip($fieldNums);

        try {
            $delFile = $this->openCompoundFile('.del');

            $byteCount = $delFile->readInt();
            $byteCount = ceil($byteCount/8);
            $bitCount  = $delFile->readInt();

            if ($bitCount == 0) {
                $delBytes = '';
            } else {
                $delBytes = $delFile->readBytes($byteCount);
            }

            if (extension_loaded('bitset')) {
                $this->_deleted = $delBytes;
            } else {
                $this->_deleted = array();
                for ($count = 0; $count < $byteCount; $count++) {
                    $byte = ord($delBytes{$count});
                    for ($bit = 0; $bit < 8; $bit++) {
                        if ($byte & (1<<$bit)) {
                            $this->_deleted[$count*8 + $bit] = 1;
                        }
                    }
                }
            }
        } catch(Zend_Search_Exception $e) {
            if (strpos($e->getMessage(), 'compound file doesn\'t contain') !== false ) {
                $this->_deleted = null;
            } else {
                throw $e;
            }
        }
    }

    /**
     * Opens index file stoted within compound index file
     *
     * @param string $extension
     * @param boolean $shareHandler
     * @throws Zend_Search_Lucene_Exception
     * @return Zend_Search_Lucene_Storage_File
     */
    public function openCompoundFile($extension, $shareHandler = true)
    {
        $filename = $this->_name . $extension;

        // Try to open common file first
        if ($this->_directory->fileExists($filename)) {
            return $this->_directory->getFileObject($filename, $shareHandler);
        }

        if( !isset($this->_segFiles[$filename]) ) {
            throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
                                       . $filename . ' file.' );
        }

        $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);
        $file->seek($this->_segFiles[$filename]);
        return $file;
    }

    /**
     * Get compound file length
     *
     * @param string $extension
     * @return integer
     */
    public function compoundFileLength($extension)
    {
        $filename = $this->_name . $extension;

        // Try to get common file first
        if ($this->_directory->fileExists($filename)) {
            return $this->_directory->fileLength($filename);
        }

        if( !isset($this->_segFileSizes[$filename]) ) {
            throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
                                       . $filename . ' file.' );
        }

        return $this->_segFileSizes[$filename];
    }

    /**
     * Returns field index or -1 if field is not found
     *
     * @param string $fieldName
     * @return integer
     */
    public function getFieldNum($fieldName)
    {
        foreach( $this->_fields as $field ) {
            if( $field->name == $fieldName ) {
                return $field->number;
            }
        }

        return -1;
    }

    /**
     * Returns field info for specified field
     *
     * @param integer $fieldNum
     * @return ZSearchFieldInfo
     */
    public function getField($fieldNum)
    {
        return $this->_fields[$fieldNum];
    }

    /**
     * Returns array of fields.
     * if $indexed parameter is true, then returns only indexed fields.
     *
     * @param boolean $indexed
     * @return array
     */
    public function getFields($indexed = false)
    {
        $result = array();
        foreach( $this->_fields as $field ) {
            if( (!$indexed) || $field->isIndexed ) {
                $result[ $field->name ] = $field->name;
            }
        }
        return $result;
    }

    /**
     * Returns array of FieldInfo objects.
     *
     * @return array
     */
    public function getFieldInfos()
    {
        return $this->_fields;
    }

    /**
     * Returns the total number of documents in this segment (including deleted documents).
     *
     * @return integer
     */
    public function count()
    {
        return $this->_docCount;
    }

    /**
     * Returns number of deleted documents.
     *
     * @return integer
     */
    private function _deletedCount()
    {
        if ($this->_deleted === null) {
            return 0;
        }

        if (extension_loaded('bitset')) {
            return count(bitset_to_array($this->_deleted));
        } else {
            return count($this->_deleted);
        }
    }

    /**
     * Returns the total number of non-deleted documents in this segment.
     *
     * @return integer
     */
    public function numDocs()
    {
        if ($this->hasDeletions()) {
            return $this->_docCount - $this->_deletedCount();
        } else {
            return $this->_docCount;
        }
    }

    /**
     * Get field position in a fields dictionary
     *
     * @param integer $fieldNum
     * @return integer
     */
    private function _getFieldPosition($fieldNum) {
        // Treat values which are not in a translation table as a 'direct value'
        return isset($this->_fieldsDicPositions[$fieldNum]) ?
                           $this->_fieldsDicPositions[$fieldNum] : $fieldNum;
    }

    /**
     * Return segment name
     *
     * @return string
     */
    public function getName()
    {
        return $this->_name;
    }


    /**
     * TermInfo cache
     *
     * Size is 1024.
     * Numbers are used instead of class constants because of performance considerations
     *
     * @var array
     */
    private $_termInfoCache = array();

    private function _cleanUpTermInfoCache()
    {
        // Clean 256 term infos
        foreach ($this->_termInfoCache as $key => $termInfo) {
            unset($this->_termInfoCache[$key]);

            // leave 768 last used term infos
            if (count($this->_termInfoCache) == 768) {
                break;
            }
        }
    }

    /**
     * Scans terms dictionary and returns term info
     *
     * @param Zend_Search_Lucene_Index_Term $term
     * @return Zend_Search_Lucene_Index_TermInfo
     */
    public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
    {
        $termKey = $term->key();
        if (isset($this->_termInfoCache[$termKey])) {
            $termInfo = $this->_termInfoCache[$termKey];

            // Move termInfo to the end of cache
            unset($this->_termInfoCache[$termKey]);
            $this->_termInfoCache[$termKey] = $termInfo;

            return $termInfo;
        }


        if ($this->_termDictionary === null) {
            // Check, if index is already serialized
            if ($this->_directory->fileExists($this->_name . '.sti')) {
                // Prefetch dictionary index data
                $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
                $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));

                // Load dictionary index data
                list($this->_termDictionary, $this->_termDictionaryInfos) = unserialize($stiFileData);
            } else {
                // Prefetch dictionary index data
                $tiiFile = $this->openCompoundFile('.tii');
                $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));

                // Load dictionary index data
                list($this->_termDictionary, $this->_termDictionaryInfos) =
                            Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);

                $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
                $stiFile = $this->_directory->createFile($this->_name . '.sti');
                $stiFile->writeBytes($stiFileData);
            }

        }



        $searchField = $this->getFieldNum($term->field);

        if ($searchField == -1) {
            return null;
        }
        $searchDicField = $this->_getFieldPosition($searchField);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?