📄 multiterm.php
字号:
} else { //$this->_resVector = array_intersect_key($this->_resVector, $nextResVector); /** * This code is used as workaround for array_intersect_key() slowness problem. */ $updatedVector = array(); foreach ($this->_resVector as $id => $value) { if (isset($nextResVector[$id])) { $updatedVector[$id] = $value; } } $this->_resVector = $updatedVector; } if (count($this->_resVector) == 0) { // Empty result set, we don't need to check other terms break; } } // ksort($this->_resVector, SORT_NUMERIC); // Docs are returned ordered. Used algorithm doesn't change elements order. } /** * Calculate result vector for non Conjunction query * (like '+something -another') * * @param Zend_Search_Lucene_Interface $reader */ private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader) { $requiredVectors = array(); $requiredVectorsSizes = array(); $requiredVectorsIds = array(); // is used to prevent arrays comparison $optional = array(); $prohibited = array(); foreach ($this->_terms as $termId => $term) { $termDocs = array_flip($reader->termDocs($term)); if ($this->_signs[$termId] === true) { // required $requiredVectors[] = $termDocs; $requiredVectorsSizes[] = count($termDocs); $requiredVectorsIds[] = $termId; } elseif ($this->_signs[$termId] === false) { // prohibited // array union $prohibited += $termDocs; } else { // neither required, nor prohibited // array union $optional += $termDocs; } $this->_termsFreqs[$termId] = $reader->termFreqs($term); } // sort resvectors in order of subquery cardinality increasing array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC, $requiredVectorsIds, SORT_ASC, SORT_NUMERIC, $requiredVectors); $required = null; foreach ($requiredVectors as $nextResVector) { if($required === null) { $required = $nextResVector; } else { //$required = array_intersect_key($required, $nextResVector); /** * This code is used as workaround for array_intersect_key() slowness problem. */ $updatedVector = array(); foreach ($required as $id => $value) { if (isset($nextResVector[$id])) { $updatedVector[$id] = $value; } } $required = $updatedVector; } if (count($required) == 0) { // Empty result set, we don't need to check other terms break; } } if ($required !== null) { $this->_resVector = $required; } else { $this->_resVector = $optional; } if (count($prohibited) != 0) { // $this->_resVector = array_diff_key($this->_resVector, $prohibited); /** * This code is used as workaround for array_diff_key() slowness problem. */ if (count($this->_resVector) < count($prohibited)) { $updatedVector = $this->_resVector; foreach ($this->_resVector as $id => $value) { if (isset($prohibited[$id])) { unset($updatedVector[$id]); } } $this->_resVector = $updatedVector; } else { $updatedVector = $this->_resVector; foreach ($prohibited as $id => $value) { unset($updatedVector[$id]); } $this->_resVector = $updatedVector; } } ksort($this->_resVector, SORT_NUMERIC); } /** * Score calculator for conjunction queries (all terms are required) * * @param integer $docId * @param Zend_Search_Lucene_Interface $reader * @return float */ public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader) { if ($this->_coord === null) { $this->_coord = $reader->getSimilarity()->coord(count($this->_terms), count($this->_terms) ); } $score = 0.0; foreach ($this->_terms as $termId=>$term) { /** * We don't need to check that term freq is not 0 * Score calculation is performed only for matched docs */ $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) * $this->_weights[$termId]->getValue() * $reader->norm($docId, $term->field); } return $score * $this->_coord * $this->getBoost(); } /** * Score calculator for non conjunction queries (not all terms are required) * * @param integer $docId * @param Zend_Search_Lucene_Interface $reader * @return float */ public function _nonConjunctionScore($docId, $reader) { if ($this->_coord === null) { $this->_coord = array(); $maxCoord = 0; foreach ($this->_signs as $sign) { if ($sign !== false /* not prohibited */) { $maxCoord++; } } for ($count = 0; $count <= $maxCoord; $count++) { $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord); } } $score = 0.0; $matchedTerms = 0; foreach ($this->_terms as $termId=>$term) { // Check if term is if ($this->_signs[$termId] !== false && // not prohibited isset($this->_termsFreqs[$termId][$docId]) // matched ) { $matchedTerms++; /** * We don't need to check that term freq is not 0 * Score calculation is performed only for matched docs */ $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) * $this->_weights[$termId]->getValue() * $reader->norm($docId, $term->field); } } return $score * $this->_coord[$matchedTerms] * $this->getBoost(); } /** * Execute query in context of index reader * It also initializes necessary internal structures * * @param Zend_Search_Lucene_Interface $reader */ public function execute(Zend_Search_Lucene_Interface $reader) { if ($this->_signs === null) { $this->_calculateConjunctionResult($reader); } else { $this->_calculateNonConjunctionResult($reader); } // Initialize weight if it's not done yet $this->_initWeight($reader); } /** * Get document ids likely matching the query * * It's an array with document ids as keys (performance considerations) * * @return array */ public function matchedDocs() { return $this->_resVector; } /** * Score specified document * * @param integer $docId * @param Zend_Search_Lucene_Interface $reader * @return float */ public function score($docId, Zend_Search_Lucene_Interface $reader) { if (isset($this->_resVector[$docId])) { if ($this->_signs === null) { return $this->_conjunctionScore($docId, $reader); } else { return $this->_nonConjunctionScore($docId, $reader); } } else { return 0; } } /** * Return query terms * * @return array */ public function getQueryTerms() { if ($this->_signs === null) { return $this->_terms; } $terms = array(); foreach ($this->_signs as $id => $sign) { if ($sign !== false) { $terms[] = $this->_terms[$id]; } } return $terms; } /** * Highlight query terms * * @param integer &$colorIndex * @param Zend_Search_Lucene_Document_Html $doc */ public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex) { $words = array(); if ($this->_signs === null) { foreach ($this->_terms as $term) { $words[] = $term->text; } } else { foreach ($this->_signs as $id => $sign) { if ($sign !== false) { $words[] = $this->_terms[$id]->text; } } } $doc->highlight($words, $this->_getHighlightColor($colorIndex)); } /** * Print a query * * @return string */ public function __toString() { // It's used only for query visualisation, so we don't care about characters escaping $query = ''; foreach ($this->_terms as $id => $term) { if ($id != 0) { $query .= ' '; } if ($this->_signs === null || $this->_signs[$id] === true) { $query .= '+'; } else if ($this->_signs[$id] === false) { $query .= '-'; } if ($term->field !== null) { $query .= $term->field . ':'; } $query .= $term->text; } if ($this->getBoost() != 1) { $query = '(' . $query . ')^' . $this->getBoost(); } return $query; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -