📄 multiterm.php
字号:
} else { //$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
$updatedVector = array();
foreach ($this->_resVector as $id => $value) {
if (isset($nextResVector[$id])) {
$updatedVector[$id] = $value;
}
}
$this->_resVector = $updatedVector;
} if (count($this->_resVector) == 0) { // Empty result set, we don't need to check other terms break; } } // ksort($this->_resVector, SORT_NUMERIC);
// Docs are returned ordered. Used algorithm doesn't change elements order. } /** * Calculate result vector for non Conjunction query * (like '+something -another') * * @param Zend_Search_Lucene_Interface $reader */ private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader) { $requiredVectors = array();
$requiredVectorsSizes = array();
$requiredVectorsIds = array(); // is used to prevent arrays comparison
$optional = array(); $prohibited = array();
foreach ($this->_terms as $termId => $term) { $termDocs = array_flip($reader->termDocs($term)); if ($this->_signs[$termId] === true) { // required
$requiredVectors[] = $termDocs;
$requiredVectorsSizes[] = count($termDocs);
$requiredVectorsIds[] = $termId;
} elseif ($this->_signs[$termId] === false) { // prohibited // array union $prohibited += $termDocs; } else { // neither required, nor prohibited // array union $optional += $termDocs; } $this->_termsFreqs[$termId] = $reader->termFreqs($term); }
// sort resvectors in order of subquery cardinality increasing
array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
$requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
$requiredVectors);
$required = null;
foreach ($requiredVectors as $nextResVector) {
if($required === null) {
$required = $nextResVector;
} else {
//$required = array_intersect_key($required, $nextResVector);
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
$updatedVector = array();
foreach ($required as $id => $value) {
if (isset($nextResVector[$id])) {
$updatedVector[$id] = $value;
}
}
$required = $updatedVector;
}
if (count($required) == 0) {
// Empty result set, we don't need to check other terms
break;
}
}
if ($required !== null) {
$this->_resVector = $required;
} else {
$this->_resVector = $optional; }
if (count($prohibited) != 0) {
// $this->_resVector = array_diff_key($this->_resVector, $prohibited);
/**
* This code is used as workaround for array_diff_key() slowness problem.
*/
if (count($this->_resVector) < count($prohibited)) {
$updatedVector = $this->_resVector;
foreach ($this->_resVector as $id => $value) {
if (isset($prohibited[$id])) {
unset($updatedVector[$id]);
}
}
$this->_resVector = $updatedVector;
} else {
$updatedVector = $this->_resVector;
foreach ($prohibited as $id => $value) {
unset($updatedVector[$id]);
}
$this->_resVector = $updatedVector;
}
}
ksort($this->_resVector, SORT_NUMERIC); } /** * Score calculator for conjunction queries (all terms are required) * * @param integer $docId * @param Zend_Search_Lucene_Interface $reader * @return float */ public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader) { if ($this->_coord === null) { $this->_coord = $reader->getSimilarity()->coord(count($this->_terms), count($this->_terms) ); } $score = 0.0; foreach ($this->_terms as $termId=>$term) { /** * We don't need to check that term freq is not 0 * Score calculation is performed only for matched docs */ $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) * $this->_weights[$termId]->getValue() * $reader->norm($docId, $term->field); } return $score * $this->_coord * $this->getBoost(); } /** * Score calculator for non conjunction queries (not all terms are required) * * @param integer $docId * @param Zend_Search_Lucene_Interface $reader * @return float */ public function _nonConjunctionScore($docId, $reader) { if ($this->_coord === null) { $this->_coord = array(); $maxCoord = 0; foreach ($this->_signs as $sign) { if ($sign !== false /* not prohibited */) { $maxCoord++; } } for ($count = 0; $count <= $maxCoord; $count++) { $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord); } } $score = 0.0; $matchedTerms = 0; foreach ($this->_terms as $termId=>$term) { // Check if term is if ($this->_signs[$termId] !== false && // not prohibited isset($this->_termsFreqs[$termId][$docId]) // matched ) { $matchedTerms++; /** * We don't need to check that term freq is not 0 * Score calculation is performed only for matched docs */ $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) * $this->_weights[$termId]->getValue() * $reader->norm($docId, $term->field); } } return $score * $this->_coord[$matchedTerms] * $this->getBoost(); } /** * Execute query in context of index reader * It also initializes necessary internal structures * * @param Zend_Search_Lucene_Interface $reader */ public function execute(Zend_Search_Lucene_Interface $reader) { if ($this->_signs === null) { $this->_calculateConjunctionResult($reader); } else { $this->_calculateNonConjunctionResult($reader); } // Initialize weight if it's not done yet $this->_initWeight($reader); } /** * Get document ids likely matching the query * * It's an array with document ids as keys (performance considerations) * * @return array */ public function matchedDocs() { return $this->_resVector; } /** * Score specified document * * @param integer $docId * @param Zend_Search_Lucene_Interface $reader * @return float */ public function score($docId, Zend_Search_Lucene_Interface $reader) { if (isset($this->_resVector[$docId])) { if ($this->_signs === null) { return $this->_conjunctionScore($docId, $reader); } else { return $this->_nonConjunctionScore($docId, $reader); } } else { return 0; } } /** * Return query terms * * @return array */ public function getQueryTerms() { if ($this->_signs === null) { return $this->_terms; } $terms = array(); foreach ($this->_signs as $id => $sign) { if ($sign !== false) { $terms[] = $this->_terms[$id]; } } return $terms; } /** * Highlight query terms * * @param integer &$colorIndex * @param Zend_Search_Lucene_Document_Html $doc */ public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex) { $words = array(); if ($this->_signs === null) { foreach ($this->_terms as $term) { $words[] = $term->text; } } else { foreach ($this->_signs as $id => $sign) { if ($sign !== false) { $words[] = $this->_terms[$id]->text; } } } $doc->highlight($words, $this->_getHighlightColor($colorIndex)); } /** * Print a query * * @return string */ public function __toString() { // It's used only for query visualisation, so we don't care about characters escaping $query = ''; foreach ($this->_terms as $id => $term) { if ($id != 0) { $query .= ' '; } if ($this->_signs === null || $this->_signs[$id] === true) { $query .= '+'; } else if ($this->_signs[$id] === false) { $query .= '-'; } if ($term->field !== null) { $query .= $term->field . ':'; } $query .= $term->text; } if ($this->getBoost() != 1) { $query = '(' . $query . ')^' . $this->getBoost(); } return $query; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -