segmentwriter.php
来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· PHP 代码 · 共 601 行 · 第 1/2 页
PHP
601 行
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Index_SegmentInfo */
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
/** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */
require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';
/** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */
require_once 'Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Search_Lucene_Index_SegmentWriter
{
/**
* Expert: The fraction of terms in the "dictionary" which should be stored
* in RAM. Smaller values use more memory, but make searching slightly
* faster, while larger values use less memory and make searching slightly
* slower. Searching is typically not dominated by dictionary lookup, so
* tweaking this is rarely useful.
*
* @var integer
*/
static public $indexInterval = 128;
/** Expert: The fraction of TermDocs entries stored in skip tables.
* Larger values result in smaller indexes, greater acceleration, but fewer
* accelerable cases, while smaller values result in bigger indexes,
* less acceleration and more
* accelerable cases. More detailed experiments would be useful here.
*
* 0x0x7FFFFFFF indicates that we don't use skip data
* Default value is 16
*
* @var integer
*/
static public $skipInterval = 0x7FFFFFFF;
/**
* Number of docs in a segment
*
* @var integer
*/
protected $_docCount = 0;
/**
* Segment name
*
* @var string
*/
protected $_name;
/**
* File system adapter.
*
* @var Zend_Search_Lucene_Storage_Directory
*/
protected $_directory;
/**
* List of the index files.
* Used for automatic compound file generation
*
* @var unknown_type
*/
protected $_files = array();
/**
* Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
*
* @var array
*/
protected $_fields = array();
/**
* Normalization factors.
* An array fieldName => normVector
* normVector is a binary string.
* Each byte corresponds to an indexed document in a segment and
* encodes normalization factor (float value, encoded by
* Zend_Search_Lucene_Search_Similarity::encodeNorm())
*
* @var array
*/
protected $_norms = array();
/**
* '.fdx' file - Stored Fields, the field index.
*
* @var Zend_Search_Lucene_Storage_File
*/
protected $_fdxFile = null;
/**
* '.fdt' file - Stored Fields, the field data.
*
* @var Zend_Search_Lucene_Storage_File
*/
protected $_fdtFile = null;
/**
* Object constructor.
*
* @param Zend_Search_Lucene_Storage_Directory $directory
* @param string $name
*/
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
{
$this->_directory = $directory;
$this->_name = $name;
}
/**
* Add field to the segment
*
* Returns actual field number
*
* @param Zend_Search_Lucene_Field $field
* @return integer
*/
public function addField(Zend_Search_Lucene_Field $field)
{
if (!isset($this->_fields[$field->name])) {
$fieldNumber = count($this->_fields);
$this->_fields[$field->name] =
new Zend_Search_Lucene_Index_FieldInfo($field->name,
$field->isIndexed,
$fieldNumber,
$field->storeTermVector);
return $fieldNumber;
} else {
$this->_fields[$field->name]->isIndexed |= $field->isIndexed;
$this->_fields[$field->name]->storeTermVector |= $field->storeTermVector;
return $this->_fields[$field->name]->number;
}
}
/**
* Add fieldInfo to the segment
*
* Returns actual field number
*
* @param Zend_Search_Lucene_Index_FieldInfo $fieldInfo
* @return integer
*/
public function addFieldInfo(Zend_Search_Lucene_Index_FieldInfo $fieldInfo)
{
if (!isset($this->_fields[$fieldInfo->name])) {
$fieldNumber = count($this->_fields);
$this->_fields[$fieldInfo->name] =
new Zend_Search_Lucene_Index_FieldInfo($fieldInfo->name,
$fieldInfo->isIndexed,
$fieldNumber,
$fieldInfo->storeTermVector);
return $fieldNumber;
} else {
$this->_fields[$fieldInfo->name]->isIndexed |= $fieldInfo->isIndexed;
$this->_fields[$fieldInfo->name]->storeTermVector |= $fieldInfo->storeTermVector;
return $this->_fields[$fieldInfo->name]->number;
}
}
/**
* Returns array of FieldInfo objects.
*
* @return array
*/
public function getFieldInfos()
{
return $this->_fields;
}
/**
* Add stored fields information
*
* @param array $storedFields array of Zend_Search_Lucene_Field objects
*/
public function addStoredFields($storedFields)
{
if (!isset($this->_fdxFile)) {
$this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx');
$this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt');
$this->_files[] = $this->_name . '.fdx';
$this->_files[] = $this->_name . '.fdt';
}
$this->_fdxFile->writeLong($this->_fdtFile->tell());
$this->_fdtFile->writeVInt(count($storedFields));
foreach ($storedFields as $field) {
$this->_fdtFile->writeVInt($this->_fields[$field->name]->number);
$fieldBits = ($field->isTokenized ? 0x01 : 0x00) |
($field->isBinary ? 0x02 : 0x00) |
0x00; /* 0x04 - third bit, compressed (ZLIB) */
$this->_fdtFile->writeByte($fieldBits);
if ($field->isBinary) {
$this->_fdtFile->writeVInt(strlen($field->value));
$this->_fdtFile->writeBytes($field->value);
} else {
$this->_fdtFile->writeString($field->getUtf8Value());
}
}
$this->_docCount++;
}
/**
* Returns the total number of documents in this segment.
*
* @return integer
*/
public function count()
{
return $this->_docCount;
}
/**
* Dump Field Info (.fnm) segment file
*/
protected function _dumpFNM()
{
$fnmFile = $this->_directory->createFile($this->_name . '.fnm');
$fnmFile->writeVInt(count($this->_fields));
foreach ($this->_fields as $field) {
$fnmFile->writeString($field->name);
$fnmFile->writeByte(($field->isIndexed ? 0x01 : 0x00) |
($field->storeTermVector ? 0x02 : 0x00)
// not supported yet 0x04 /* term positions are stored with the term vectors */ |
// not supported yet 0x08 /* term offsets are stored with the term vectors */ |
);
if ($field->isIndexed) {
$normFileName = $this->_name . '.f' . $field->number;
$fFile = $this->_directory->createFile($normFileName);
$fFile->writeBytes($this->_norms[$field->name]);
$this->_files[] = $normFileName;
}
}
$this->_files[] = $this->_name . '.fnm';
}
/**
* Term Dictionary file
*
* @var Zend_Search_Lucene_Storage_File
*/
private $_tisFile = null;
/**
* Term Dictionary index file
*
* @var Zend_Search_Lucene_Storage_File
*/
private $_tiiFile = null;
/**
* Frequencies file
*
* @var Zend_Search_Lucene_Storage_File
*/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?