📄 boolean.php
字号:
if (count($prohibitedTerms) == 1) { // (boost factors are not significant for prohibited clauses) $subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms)); $signs[] = false; // Clear prohibited terms list $prohibitedTerms = array(); } else if (count($prohibitedTerms) > 1) { // prepare signs array $prohibitedSigns = array(); foreach ($prohibitedTerms as $id => $term) { // all prohibited term are grouped as optional into multi-term query $prohibitedSigns[$id] = null; } // (boost factors are not significant for prohibited clauses) $subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns); // Clause sign is 'prohibited' $signs[] = false; // Clear terms list $prohibitedTerms = array(); } /** @todo Group terms with the same boost factors together */ // Check, that all terms are processed // Replace candidate for optimized query if (count($terms) == 0 && count($prohibitedTerms) == 0) { $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs); $optimizedQuery->setBoost($this->getBoost()); } return $optimizedQuery; } /** * Returns subqueries * * @return array */ public function getSubqueries() { return $this->_subqueries; } /** * Return subqueries signs * * @return array */ public function getSigns() { return $this->_signs; } /** * Constructs an appropriate Weight implementation for this query. * * @param Zend_Search_Lucene_Interface $reader * @return Zend_Search_Lucene_Search_Weight */ public function createWeight(Zend_Search_Lucene_Interface $reader) { $this->_weight = new Zend_Search_Lucene_Search_Weight_Boolean($this, $reader); return $this->_weight; } /** * Calculate result vector for Conjunction query * (like '<subquery1> AND <subquery2> AND <subquery3>') */ private function _calculateConjunctionResult() { $this->_resVector = null; if (count($this->_subqueries) == 0) { $this->_resVector = array(); }
$resVectors = array();
$resVectorsSizes = array();
$resVectorsIds = array(); // is used to prevent arrays comparison
foreach ($this->_subqueries as $subqueryId => $subquery) {
$resVectors[] = $subquery->matchedDocs();
$resVectorsSizes[] = count(end($resVectors));
$resVectorsIds[] = $subqueryId;
}
// sort resvectors in order of subquery cardinality increasing
array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
$resVectorsIds, SORT_ASC, SORT_NUMERIC,
$resVectors);
foreach ($resVectors as $nextResVector) { if($this->_resVector === null) { $this->_resVector = $nextResVector; } else { //$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
$updatedVector = array();
foreach ($this->_resVector as $id => $value) {
if (isset($nextResVector[$id])) {
$updatedVector[$id] = $value;
}
}
$this->_resVector = $updatedVector;
} if (count($this->_resVector) == 0) { // Empty result set, we don't need to check other terms break; } } // ksort($this->_resVector, SORT_NUMERIC);
// Used algorithm doesn't change elements order } /** * Calculate result vector for non Conjunction query * (like '<subquery1> AND <subquery2> AND NOT <subquery3> OR <subquery4>') */ private function _calculateNonConjunctionResult() { $requiredVectors = array(); $requiredVectorsSizes = array();
$requiredVectorsIds = array(); // is used to prevent arrays comparison
$optional = array(); foreach ($this->_subqueries as $subqueryId => $subquery) { if ($this->_signs[$subqueryId] === true) {
// required
$requiredVectors[] = $subquery->matchedDocs(); $requiredVectorsSizes[] = count(end($requiredVectors));
$requiredVectorsIds[] = $subqueryId;
} elseif ($this->_signs[$subqueryId] === false) { // prohibited // Do nothing. matchedDocs() may include non-matching id's
// Calculating prohibited vector may take significant time, but do not affect the result
// Skipped. } else { // neither required, nor prohibited // array union $optional += $subquery->matchedDocs(); } }
// sort resvectors in order of subquery cardinality increasing
array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
$requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
$requiredVectors);
$required = null;
foreach ($requiredVectors as $nextResVector) {
if($required === null) {
$required = $nextResVector;
} else {
//$required = array_intersect_key($required, $nextResVector);
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
$updatedVector = array();
foreach ($required as $id => $value) {
if (isset($nextResVector[$id])) {
$updatedVector[$id] = $value;
}
}
$required = $updatedVector;
}
if (count($required) == 0) {
// Empty result set, we don't need to check other terms
break;
}
}
if ($required !== null) { $this->_resVector = &$required; } else { $this->_resVector = &$optional; } ksort($this->_resVector, SORT_NUMERIC); } /** * Score calculator for conjunction queries (all subqueries are required) * * @param integer $docId * @param Zend_Search_Lucene_Interface $reader * @return float */ public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader) { if ($this->_coord === null) { $this->_coord = $reader->getSimilarity()->coord(count($this->_subqueries), count($this->_subqueries) ); } $score = 0; foreach ($this->_subqueries as $subquery) { $subscore = $subquery->score($docId, $reader); if ($subscore == 0) { return 0; } $score += $subquery->score($docId, $reader) * $this->_coord; } return $score * $this->_coord * $this->getBoost(); } /** * Score calculator for non conjunction queries (not all subqueries are required) * * @param integer $docId * @param Zend_Search_Lucene_Interface $reader * @return float */ public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader) { if ($this->_coord === null) { $this->_coord = array(); $maxCoord = 0; foreach ($this->_signs as $sign) { if ($sign !== false /* not prohibited */) { $maxCoord++; } } for ($count = 0; $count <= $maxCoord; $count++) { $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord); } } $score = 0; $matchedSubqueries = 0; foreach ($this->_subqueries as $subqueryId => $subquery) { $subscore = $subquery->score($docId, $reader); // Prohibited if ($this->_signs[$subqueryId] === false && $subscore != 0) { return 0; } // is required, but doen't match if ($this->_signs[$subqueryId] === true && $subscore == 0) { return 0; } if ($subscore != 0) { $matchedSubqueries++; $score += $subscore; } } return $score * $this->_coord[$matchedSubqueries] * $this->getBoost(); } /** * Execute query in context of index reader * It also initializes necessary internal structures * * @param Zend_Search_Lucene_Interface $reader */ public function execute(Zend_Search_Lucene_Interface $reader) { // Initialize weight if it's not done yet $this->_initWeight($reader); foreach ($this->_subqueries as $subquery) { $subquery->execute($reader); } if ($this->_signs === null) { $this->_calculateConjunctionResult(); } else { $this->_calculateNonConjunctionResult(); } } /** * Get document ids likely matching the query * * It's an array with document ids as keys (performance considerations) * * @return array */ public function matchedDocs() { return $this->_resVector; } /** * Score specified document * * @param integer $docId * @param Zend_Search_Lucene_Interface $reader * @return float */ public function score($docId, Zend_Search_Lucene_Interface $reader) { if (isset($this->_resVector[$docId])) { if ($this->_signs === null) { return $this->_conjunctionScore($docId, $reader); } else { return $this->_nonConjunctionScore($docId, $reader); } } else { return 0; } } /** * Return query terms * * @return array */ public function getQueryTerms() { $terms = array(); foreach ($this->_subqueries as $id => $subquery) { if ($this->_signs === null || $this->_signs[$id] !== false) { $terms = array_merge($terms, $subquery->getQueryTerms()); } } return $terms; } /** * Highlight query terms * * @param integer &$colorIndex * @param Zend_Search_Lucene_Document_Html $doc */ public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex) { foreach ($this->_subqueries as $id => $subquery) { if ($this->_signs === null || $this->_signs[$id] !== false) { $subquery->highlightMatchesDOM($doc, $colorIndex); } } } /** * Print a query * * @return string */ public function __toString() { // It's used only for query visualisation, so we don't care about characters escaping $query = ''; foreach ($this->_subqueries as $id => $subquery) { if ($id != 0) { $query .= ' '; } if ($this->_signs === null || $this->_signs[$id] === true) { $query .= '+'; } else if ($this->_signs[$id] === false) { $query .= '-'; } $query .= '(' . $subquery->__toString() . ')'; if ($subquery->getBoost() != 1) { $query .= '^' . round($subquery->getBoost(), 4); } } return $query; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -