📄 segmentwriter.php
字号:
<?php/** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Search_Lucene * @subpackage Index * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License *//** Zend_Search_Lucene_Exception */require_once 'Zend/Search/Lucene/Exception.php';/** Zend_Search_Lucene_Index_SegmentInfo */require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';/** * @category Zend * @package Zend_Search_Lucene * @subpackage Index * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */abstract class Zend_Search_Lucene_Index_SegmentWriter{ /** * Expert: The fraction of terms in the "dictionary" which should be stored * in RAM. Smaller values use more memory, but make searching slightly * faster, while larger values use less memory and make searching slightly * slower. Searching is typically not dominated by dictionary lookup, so * tweaking this is rarely useful. * * @var integer */ public static $indexInterval = 128; /** * Expert: The fraction of TermDocs entries stored in skip tables. * Larger values result in smaller indexes, greater acceleration, but fewer * accelerable cases, while smaller values result in bigger indexes, * less acceleration and more * accelerable cases. More detailed experiments would be useful here. * * 0x7FFFFFFF indicates that we don't use skip data * * Note: not used in current implementation * * @var integer */ public static $skipInterval = 0x7FFFFFFF; /** * Expert: The maximum number of skip levels. Smaller values result in * slightly smaller indexes, but slower skipping in big posting lists. * * 0 indicates that we don't use skip data * * Note: not used in current implementation * * @var integer */ public static $maxSkipLevels = 0; /** * Number of docs in a segment * * @var integer */ protected $_docCount = 0; /** * Segment name * * @var string */ protected $_name; /** * File system adapter. * * @var Zend_Search_Lucene_Storage_Directory */ protected $_directory; /** * List of the index files. * Used for automatic compound file generation * * @var unknown_type */ protected $_files = array(); /** * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment * * @var array */ protected $_fields = array(); /** * Normalization factors. * An array fieldName => normVector * normVector is a binary string. * Each byte corresponds to an indexed document in a segment and * encodes normalization factor (float value, encoded by * Zend_Search_Lucene_Search_Similarity::encodeNorm()) * * @var array */ protected $_norms = array(); /** * '.fdx' file - Stored Fields, the field index. * * @var Zend_Search_Lucene_Storage_File */ protected $_fdxFile = null; /** * '.fdt' file - Stored Fields, the field data. * * @var Zend_Search_Lucene_Storage_File */ protected $_fdtFile = null; /** * Object constructor. * * @param Zend_Search_Lucene_Storage_Directory $directory * @param string $name */ public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name) { $this->_directory = $directory; $this->_name = $name; } /** * Add field to the segment * * Returns actual field number * * @param Zend_Search_Lucene_Field $field * @return integer */ public function addField(Zend_Search_Lucene_Field $field) { if (!isset($this->_fields[$field->name])) { $fieldNumber = count($this->_fields); $this->_fields[$field->name] = new Zend_Search_Lucene_Index_FieldInfo($field->name, $field->isIndexed, $fieldNumber, $field->storeTermVector); return $fieldNumber; } else { $this->_fields[$field->name]->isIndexed |= $field->isIndexed; $this->_fields[$field->name]->storeTermVector |= $field->storeTermVector; return $this->_fields[$field->name]->number; } } /** * Add fieldInfo to the segment * * Returns actual field number * * @param Zend_Search_Lucene_Index_FieldInfo $fieldInfo * @return integer */ public function addFieldInfo(Zend_Search_Lucene_Index_FieldInfo $fieldInfo) { if (!isset($this->_fields[$fieldInfo->name])) { $fieldNumber = count($this->_fields); $this->_fields[$fieldInfo->name] = new Zend_Search_Lucene_Index_FieldInfo($fieldInfo->name, $fieldInfo->isIndexed, $fieldNumber, $fieldInfo->storeTermVector); return $fieldNumber; } else { $this->_fields[$fieldInfo->name]->isIndexed |= $fieldInfo->isIndexed; $this->_fields[$fieldInfo->name]->storeTermVector |= $fieldInfo->storeTermVector; return $this->_fields[$fieldInfo->name]->number; } } /** * Returns array of FieldInfo objects. * * @return array */ public function getFieldInfos() { return $this->_fields; } /** * Add stored fields information * * @param array $storedFields array of Zend_Search_Lucene_Field objects */ public function addStoredFields($storedFields) { if (!isset($this->_fdxFile)) { $this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx'); $this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt'); $this->_files[] = $this->_name . '.fdx'; $this->_files[] = $this->_name . '.fdt'; } $this->_fdxFile->writeLong($this->_fdtFile->tell()); $this->_fdtFile->writeVInt(count($storedFields)); foreach ($storedFields as $field) { $this->_fdtFile->writeVInt($this->_fields[$field->name]->number); $fieldBits = ($field->isTokenized ? 0x01 : 0x00) | ($field->isBinary ? 0x02 : 0x00) | 0x00; /* 0x04 - third bit, compressed (ZLIB) */ $this->_fdtFile->writeByte($fieldBits); if ($field->isBinary) { $this->_fdtFile->writeVInt(strlen($field->value)); $this->_fdtFile->writeBytes($field->value); } else { $this->_fdtFile->writeString($field->getUtf8Value()); } } $this->_docCount++; } /** * Returns the total number of documents in this segment. * * @return integer */ public function count() { return $this->_docCount; } /** * Return segment name * * @return string */ public function getName() { return $this->_name; } /** * Dump Field Info (.fnm) segment file */ protected function _dumpFNM() { $fnmFile = $this->_directory->createFile($this->_name . '.fnm'); $fnmFile->writeVInt(count($this->_fields)); $nrmFile = $this->_directory->createFile($this->_name . '.nrm'); // Write header $nrmFile->writeBytes('NRM'); // Write format specifier $nrmFile->writeByte((int)0xFF); foreach ($this->_fields as $field) { $fnmFile->writeString($field->name); $fnmFile->writeByte(($field->isIndexed ? 0x01 : 0x00) | ($field->storeTermVector ? 0x02 : 0x00)// not supported yet 0x04 /* term positions are stored with the term vectors */ |// not supported yet 0x08 /* term offsets are stored with the term vectors */ | ); if ($field->isIndexed) { // pre-2.1 index mode (not used now) // $normFileName = $this->_name . '.f' . $field->number; // $fFile = $this->_directory->createFile($normFileName); // $fFile->writeBytes($this->_norms[$field->name]); // $this->_files[] = $normFileName; $nrmFile->writeBytes($this->_norms[$field->name]); } } $this->_files[] = $this->_name . '.fnm'; $this->_files[] = $this->_name . '.nrm'; } /** * Term Dictionary file * * @var Zend_Search_Lucene_Storage_File */ private $_tisFile = null;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -