segmentwriter.php

来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· PHP 代码 · 共 601 行 · 第 1/2 页

PHP
601
字号
<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://framework.zend.com/license/new-bsd
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to license@zend.com so we can send you a copy immediately.
 *
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage Index
 * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */


/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';

/** Zend_Search_Lucene_Index_SegmentInfo */
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';

/** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */
require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';

/** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */
require_once 'Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';


/**
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage Index
 * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */
abstract class Zend_Search_Lucene_Index_SegmentWriter
{
    /**
     * Expert: The fraction of terms in the "dictionary" which should be stored
     * in RAM.  Smaller values use more memory, but make searching slightly
     * faster, while larger values use less memory and make searching slightly
     * slower.  Searching is typically not dominated by dictionary lookup, so
     * tweaking this is rarely useful.
     *
     * @var integer
     */
    static public $indexInterval = 128;

    /** Expert: The fraction of TermDocs entries stored in skip tables.
     * Larger values result in smaller indexes, greater acceleration, but fewer
     * accelerable cases, while smaller values result in bigger indexes,
     * less acceleration and more
     * accelerable cases. More detailed experiments would be useful here.
     *
     * 0x0x7FFFFFFF indicates that we don't use skip data
     * Default value is 16
     *
     * @var integer
     */
    static public $skipInterval = 0x7FFFFFFF;

    /**
     * Number of docs in a segment
     *
     * @var integer
     */
    protected $_docCount = 0;

    /**
     * Segment name
     *
     * @var string
     */
    protected $_name;

    /**
     * File system adapter.
     *
     * @var Zend_Search_Lucene_Storage_Directory
     */
    protected $_directory;

    /**
     * List of the index files.
     * Used for automatic compound file generation
     *
     * @var unknown_type
     */
    protected $_files = array();

    /**
     * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
     *
     * @var array
     */
    protected $_fields = array();

    /**
     * Normalization factors.
     * An array fieldName => normVector
     * normVector is a binary string.
     * Each byte corresponds to an indexed document in a segment and
     * encodes normalization factor (float value, encoded by
     * Zend_Search_Lucene_Search_Similarity::encodeNorm())
     *
     * @var array
     */
    protected $_norms = array();


    /**
     * '.fdx'  file - Stored Fields, the field index.
     *
     * @var Zend_Search_Lucene_Storage_File
     */
    protected $_fdxFile = null;

    /**
     * '.fdt'  file - Stored Fields, the field data.
     *
     * @var Zend_Search_Lucene_Storage_File
     */
    protected $_fdtFile = null;


    /**
     * Object constructor.
     *
     * @param Zend_Search_Lucene_Storage_Directory $directory
     * @param string $name
     */
    public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
    {
        $this->_directory = $directory;
        $this->_name      = $name;
    }


    /**
     * Add field to the segment
     *
     * Returns actual field number
     *
     * @param Zend_Search_Lucene_Field $field
     * @return integer
     */
    public function addField(Zend_Search_Lucene_Field $field)
    {
        if (!isset($this->_fields[$field->name])) {
            $fieldNumber = count($this->_fields);
            $this->_fields[$field->name] =
                                new Zend_Search_Lucene_Index_FieldInfo($field->name,
                                                                       $field->isIndexed,
                                                                       $fieldNumber,
                                                                       $field->storeTermVector);

            return $fieldNumber;
        } else {
            $this->_fields[$field->name]->isIndexed       |= $field->isIndexed;
            $this->_fields[$field->name]->storeTermVector |= $field->storeTermVector;

            return $this->_fields[$field->name]->number;
        }
    }

    /**
     * Add fieldInfo to the segment
     *
     * Returns actual field number
     *
     * @param Zend_Search_Lucene_Index_FieldInfo $fieldInfo
     * @return integer
     */
    public function addFieldInfo(Zend_Search_Lucene_Index_FieldInfo $fieldInfo)
    {
        if (!isset($this->_fields[$fieldInfo->name])) {
            $fieldNumber = count($this->_fields);
            $this->_fields[$fieldInfo->name] =
                                new Zend_Search_Lucene_Index_FieldInfo($fieldInfo->name,
                                                                       $fieldInfo->isIndexed,
                                                                       $fieldNumber,
                                                                       $fieldInfo->storeTermVector);

            return $fieldNumber;
        } else {
            $this->_fields[$fieldInfo->name]->isIndexed       |= $fieldInfo->isIndexed;
            $this->_fields[$fieldInfo->name]->storeTermVector |= $fieldInfo->storeTermVector;

            return $this->_fields[$fieldInfo->name]->number;
        }
    }

    /**
     * Returns array of FieldInfo objects.
     *
     * @return array
     */
    public function getFieldInfos()
    {
        return $this->_fields;
    }

    /**
     * Add stored fields information
     *
     * @param array $storedFields array of Zend_Search_Lucene_Field objects
     */
    public function addStoredFields($storedFields)
    {
        if (!isset($this->_fdxFile)) {
            $this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx');
            $this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt');

            $this->_files[] = $this->_name . '.fdx';
            $this->_files[] = $this->_name . '.fdt';
        }

        $this->_fdxFile->writeLong($this->_fdtFile->tell());
        $this->_fdtFile->writeVInt(count($storedFields));
        foreach ($storedFields as $field) {
            $this->_fdtFile->writeVInt($this->_fields[$field->name]->number);
            $fieldBits = ($field->isTokenized ? 0x01 : 0x00) |
                         ($field->isBinary ?    0x02 : 0x00) |
                         0x00; /* 0x04 - third bit, compressed (ZLIB) */
            $this->_fdtFile->writeByte($fieldBits);
            if ($field->isBinary) {
                $this->_fdtFile->writeVInt(strlen($field->value));
                $this->_fdtFile->writeBytes($field->value);
            } else {
                $this->_fdtFile->writeString($field->getUtf8Value());
            }
        }

        $this->_docCount++;
    }

    /**
     * Returns the total number of documents in this segment.
     *
     * @return integer
     */
    public function count()
    {
        return $this->_docCount;
    }

    /**
     * Dump Field Info (.fnm) segment file
     */
    protected function _dumpFNM()
    {
        $fnmFile = $this->_directory->createFile($this->_name . '.fnm');
        $fnmFile->writeVInt(count($this->_fields));

        foreach ($this->_fields as $field) {
            $fnmFile->writeString($field->name);
            $fnmFile->writeByte(($field->isIndexed       ? 0x01 : 0x00) |
                                ($field->storeTermVector ? 0x02 : 0x00)
// not supported yet            0x04 /* term positions are stored with the term vectors */ |
// not supported yet            0x08 /* term offsets are stored with the term vectors */   |
                               );

            if ($field->isIndexed) {
                $normFileName = $this->_name . '.f' . $field->number;
                $fFile = $this->_directory->createFile($normFileName);
                $fFile->writeBytes($this->_norms[$field->name]);
                $this->_files[] = $normFileName;
            }
        }

        $this->_files[] = $this->_name . '.fnm';
    }



    /**
     * Term Dictionary file
     *
     * @var Zend_Search_Lucene_Storage_File
     */
    private $_tisFile = null;

    /**
     * Term Dictionary index file
     *
     * @var Zend_Search_Lucene_Storage_File
     */
    private $_tiiFile = null;

    /**
     * Frequencies file
     *
     * @var Zend_Search_Lucene_Storage_File
     */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?