segmentinfo.php
来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· PHP 代码 · 共 972 行 · 第 1/2 页
PHP
972 行
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_DictionaryLoader */
require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_SegmentInfo
{
/**
* Number of docs in a segment
*
* @var integer
*/
private $_docCount;
/**
* Segment name
*
* @var string
*/
private $_name;
/**
* Term Dictionary Index
*
* Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because
* of performance considerations)
* [0] -> $termValue
* [1] -> $termFieldNum
*
* Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
*
* @var array
*/
private $_termDictionary;
/**
* Term Dictionary Index TermInfos
*
* Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because
* of performance considerations)
* [0] -> $docFreq
* [1] -> $freqPointer
* [2] -> $proxPointer
* [3] -> $skipOffset
* [4] -> $indexPointer
*
* @var array
*/
private $_termDictionaryInfos;
/**
* Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
*
* @var array
*/
private $_fields;
/**
* Field positions in a dictionary.
* (Term dictionary contains filelds ordered by names)
*
* @var array
*/
private $_fieldsDicPositions;
/**
* Associative array where the key is the file name and the value is data offset
* in a compound segment file (.csf).
*
* @var array
*/
private $_segFiles;
/**
* Associative array where the key is the file name and the value is file size (.csf).
*
* @var array
*/
private $_segFileSizes;
/**
* File system adapter.
*
* @var Zend_Search_Lucene_Storage_Directory_Filesystem
*/
private $_directory;
/**
* Normalization factors.
* An array fieldName => normVector
* normVector is a binary string.
* Each byte corresponds to an indexed document in a segment and
* encodes normalization factor (float value, encoded by
* Zend_Search_Lucene_Search_Similarity::encodeNorm())
*
* @var array
*/
private $_norms = array();
/**
* List of deleted documents.
* bitset if bitset extension is loaded or array otherwise.
*
* @var mixed
*/
private $_deleted;
/**
* $this->_deleted update flag
*
* @var boolean
*/
private $_deletedDirty = false;
/**
* Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname,
* Documents count and Directory as a parameter.
*
* @param string $name
* @param integer $docCount
* @param Zend_Search_Lucene_Storage_Directory $directory
*/
public function __construct($name, $docCount, $directory)
{
$this->_name = $name;
$this->_docCount = $docCount;
$this->_directory = $directory;
$this->_termDictionary = null;
$this->_segFiles = array();
if ($this->_directory->fileExists($name . '.cfs')) {
$cfsFile = $this->_directory->getFileObject($name . '.cfs');
$segFilesCount = $cfsFile->readVInt();
for ($count = 0; $count < $segFilesCount; $count++) {
$dataOffset = $cfsFile->readLong();
if ($count != 0) {
$this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);
}
$fileName = $cfsFile->readString();
$this->_segFiles[$fileName] = $dataOffset;
}
if ($count != 0) {
$this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;
}
}
$fnmFile = $this->openCompoundFile('.fnm');
$fieldsCount = $fnmFile->readVInt();
$fieldNames = array();
$fieldNums = array();
$this->_fields = array();
for ($count=0; $count < $fieldsCount; $count++) {
$fieldName = $fnmFile->readString();
$fieldBits = $fnmFile->readByte();
$this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
$fieldBits & 1,
$count,
$fieldBits & 2 );
if ($fieldBits & 0x10) {
// norms are omitted for the indexed field
$this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
}
$fieldNums[$count] = $count;
$fieldNames[$count] = $fieldName;
}
array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
$this->_fieldsDicPositions = array_flip($fieldNums);
try {
$delFile = $this->openCompoundFile('.del');
$byteCount = $delFile->readInt();
$byteCount = ceil($byteCount/8);
$bitCount = $delFile->readInt();
if ($bitCount == 0) {
$delBytes = '';
} else {
$delBytes = $delFile->readBytes($byteCount);
}
if (extension_loaded('bitset')) {
$this->_deleted = $delBytes;
} else {
$this->_deleted = array();
for ($count = 0; $count < $byteCount; $count++) {
$byte = ord($delBytes{$count});
for ($bit = 0; $bit < 8; $bit++) {
if ($byte & (1<<$bit)) {
$this->_deleted[$count*8 + $bit] = 1;
}
}
}
}
} catch(Zend_Search_Exception $e) {
if (strpos($e->getMessage(), 'compound file doesn\'t contain') !== false ) {
$this->_deleted = null;
} else {
throw $e;
}
}
}
/**
* Opens index file stoted within compound index file
*
* @param string $extension
* @param boolean $shareHandler
* @throws Zend_Search_Lucene_Exception
* @return Zend_Search_Lucene_Storage_File
*/
public function openCompoundFile($extension, $shareHandler = true)
{
$filename = $this->_name . $extension;
// Try to open common file first
if ($this->_directory->fileExists($filename)) {
return $this->_directory->getFileObject($filename, $shareHandler);
}
if( !isset($this->_segFiles[$filename]) ) {
throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
. $filename . ' file.' );
}
$file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);
$file->seek($this->_segFiles[$filename]);
return $file;
}
/**
* Get compound file length
*
* @param string $extension
* @return integer
*/
public function compoundFileLength($extension)
{
$filename = $this->_name . $extension;
// Try to get common file first
if ($this->_directory->fileExists($filename)) {
return $this->_directory->fileLength($filename);
}
if( !isset($this->_segFileSizes[$filename]) ) {
throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
. $filename . ' file.' );
}
return $this->_segFileSizes[$filename];
}
/**
* Returns field index or -1 if field is not found
*
* @param string $fieldName
* @return integer
*/
public function getFieldNum($fieldName)
{
foreach( $this->_fields as $field ) {
if( $field->name == $fieldName ) {
return $field->number;
}
}
return -1;
}
/**
* Returns field info for specified field
*
* @param integer $fieldNum
* @return ZSearchFieldInfo
*/
public function getField($fieldNum)
{
return $this->_fields[$fieldNum];
}
/**
* Returns array of fields.
* if $indexed parameter is true, then returns only indexed fields.
*
* @param boolean $indexed
* @return array
*/
public function getFields($indexed = false)
{
$result = array();
foreach( $this->_fields as $field ) {
if( (!$indexed) || $field->isIndexed ) {
$result[ $field->name ] = $field->name;
}
}
return $result;
}
/**
* Returns array of FieldInfo objects.
*
* @return array
*/
public function getFieldInfos()
{
return $this->_fields;
}
/**
* Returns the total number of documents in this segment (including deleted documents).
*
* @return integer
*/
public function count()
{
return $this->_docCount;
}
/**
* Returns number of deleted documents.
*
* @return integer
*/
private function _deletedCount()
{
if ($this->_deleted === null) {
return 0;
}
if (extension_loaded('bitset')) {
return count(bitset_to_array($this->_deleted));
} else {
return count($this->_deleted);
}
}
/**
* Returns the total number of non-deleted documents in this segment.
*
* @return integer
*/
public function numDocs()
{
if ($this->hasDeletions()) {
return $this->_docCount - $this->_deletedCount();
} else {
return $this->_docCount;
}
}
/**
* Get field position in a fields dictionary
*
* @param integer $fieldNum
* @return integer
*/
private function _getFieldPosition($fieldNum) {
// Treat values which are not in a translation table as a 'direct value'
return isset($this->_fieldsDicPositions[$fieldNum]) ?
$this->_fieldsDicPositions[$fieldNum] : $fieldNum;
}
/**
* Return segment name
*
* @return string
*/
public function getName()
{
return $this->_name;
}
/**
* TermInfo cache
*
* Size is 1024.
* Numbers are used instead of class constants because of performance considerations
*
* @var array
*/
private $_termInfoCache = array();
private function _cleanUpTermInfoCache()
{
// Clean 256 term infos
foreach ($this->_termInfoCache as $key => $termInfo) {
unset($this->_termInfoCache[$key]);
// leave 768 last used term infos
if (count($this->_termInfoCache) == 768) {
break;
}
}
}
/**
* Scans terms dictionary and returns term info
*
* @param Zend_Search_Lucene_Index_Term $term
* @return Zend_Search_Lucene_Index_TermInfo
*/
public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
{
$termKey = $term->key();
if (isset($this->_termInfoCache[$termKey])) {
$termInfo = $this->_termInfoCache[$termKey];
// Move termInfo to the end of cache
unset($this->_termInfoCache[$termKey]);
$this->_termInfoCache[$termKey] = $termInfo;
return $termInfo;
}
if ($this->_termDictionary === null) {
// Check, if index is already serialized
if ($this->_directory->fileExists($this->_name . '.sti')) {
// Prefetch dictionary index data
$stiFile = $this->_directory->getFileObject($this->_name . '.sti');
$stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
// Load dictionary index data
list($this->_termDictionary, $this->_termDictionaryInfos) = unserialize($stiFileData);
} else {
// Prefetch dictionary index data
$tiiFile = $this->openCompoundFile('.tii');
$tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
// Load dictionary index data
list($this->_termDictionary, $this->_termDictionaryInfos) =
Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
$stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
$stiFile = $this->_directory->createFile($this->_name . '.sti');
$stiFile->writeBytes($stiFileData);
}
}
$searchField = $this->getFieldNum($term->field);
if ($searchField == -1) {
return null;
}
$searchDicField = $this->_getFieldPosition($searchField);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?