⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 phrase.php

📁 很棒的在线教学系统
💻 PHP
📖 第 1 页 / 共 2 页
字号:
     * Score calculator for exact phrase queries (terms sequence is fixed)     *     * @param integer $docId     * @return float     */    public function _exactPhraseFreq($docId)    {        $freq = 0;        // Term Id with lowest cardinality        $lowCardTermId = null;        // Calculate $lowCardTermId        foreach ($this->_terms as $termId => $term) {            if ($lowCardTermId === null ||                count($this->_termsPositions[$termId][$docId]) <                count($this->_termsPositions[$lowCardTermId][$docId]) ) {                    $lowCardTermId = $termId;                }        }        // Walk through positions of the term with lowest cardinality        foreach ($this->_termsPositions[$lowCardTermId][$docId] as $lowCardPos) {            // We expect phrase to be found            $freq++;            // Walk through other terms            foreach ($this->_terms as $termId => $term) {                if ($termId != $lowCardTermId) {                    $expectedPosition = $lowCardPos +                                            ($this->_offsets[$termId] -                                             $this->_offsets[$lowCardTermId]);                    if (!in_array($expectedPosition, $this->_termsPositions[$termId][$docId])) {                        $freq--;  // Phrase wasn't found.                        break;                    }                }            }        }        return $freq;    }    /**     * Score calculator for sloppy phrase queries (terms sequence is fixed)     *     * @param integer $docId     * @param Zend_Search_Lucene_Interface $reader     * @return float     */    public function _sloppyPhraseFreq($docId, Zend_Search_Lucene_Interface $reader)    {        $freq = 0;        $phraseQueue = array();        $phraseQueue[0] = array(); // empty phrase        $lastTerm = null;        // Walk through the terms to create phrases.        foreach ($this->_terms as $termId => $term) {            $queueSize = count($phraseQueue);            $firstPass = true;            // Walk through the term positions.            // Each term position produces a set of phrases.            foreach ($this->_termsPositions[$termId][$docId] as $termPosition ) {                if ($firstPass) {                    for ($count = 0; $count < $queueSize; $count++) {                        $phraseQueue[$count][$termId] = $termPosition;                    }                } else {                    for ($count = 0; $count < $queueSize; $count++) {                        if ($lastTerm !== null &&                            abs( $termPosition - $phraseQueue[$count][$lastTerm] -                                 ($this->_offsets[$termId] - $this->_offsets[$lastTerm])) > $this->_slop) {                            continue;                        }                        $newPhraseId = count($phraseQueue);                        $phraseQueue[$newPhraseId]          = $phraseQueue[$count];                        $phraseQueue[$newPhraseId][$termId] = $termPosition;                    }                }                $firstPass = false;            }            $lastTerm = $termId;        }        foreach ($phraseQueue as $phrasePos) {            $minDistance = null;            for ($shift = -$this->_slop; $shift <= $this->_slop; $shift++) {                $distance = 0;                $start = reset($phrasePos) - reset($this->_offsets) + $shift;                foreach ($this->_terms as $termId => $term) {                    $distance += abs($phrasePos[$termId] - $this->_offsets[$termId] - $start);                    if($distance > $this->_slop) {                        break;                    }                }                if ($minDistance === null || $distance < $minDistance) {                    $minDistance = $distance;                }            }            if ($minDistance <= $this->_slop) {                $freq += $reader->getSimilarity()->sloppyFreq($minDistance);            }        }        return $freq;    }    /**     * Execute query in context of index reader     * It also initializes necessary internal structures     *     * @param Zend_Search_Lucene_Interface $reader     */    public function execute(Zend_Search_Lucene_Interface $reader)    {        $this->_resVector = null;        if (count($this->_terms) == 0) {            $this->_resVector = array();        }        $resVectors      = array();        $resVectorsSizes = array();        $resVectorsIds   = array(); // is used to prevent arrays comparison        foreach ($this->_terms as $termId => $term) {            $resVectors[]      = array_flip($reader->termDocs($term));            $resVectorsSizes[] = count(end($resVectors));            $resVectorsIds[]   = $termId;                        $this->_termsPositions[$termId] = $reader->termPositions($term);        }        // sort resvectors in order of subquery cardinality increasing        array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,                        $resVectorsIds,   SORT_ASC, SORT_NUMERIC,                        $resVectors);                foreach ($resVectors as $nextResVector) {            if($this->_resVector === null) {                $this->_resVector = $nextResVector;            } else {                //$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);                                /**                 * This code is used as workaround for array_intersect_key() slowness problem.                 */                $updatedVector = array();                foreach ($this->_resVector as $id => $value) {                    if (isset($nextResVector[$id])) {                        $updatedVector[$id] = $value;                    }                }                $this->_resVector = $updatedVector;            }            if (count($this->_resVector) == 0) {                // Empty result set, we don't need to check other terms                break;            }        }        // ksort($this->_resVector, SORT_NUMERIC);        // Docs are returned ordered. Used algorithm doesn't change elements order.                                // Initialize weight if it's not done yet        $this->_initWeight($reader);    }    /**     * Get document ids likely matching the query     *     * It's an array with document ids as keys (performance considerations)     *     * @return array     */    public function matchedDocs()    {        return $this->_resVector;    }    /**     * Score specified document     *     * @param integer $docId     * @param Zend_Search_Lucene_Interface $reader     * @return float     */    public function score($docId, Zend_Search_Lucene_Interface $reader)    {        if (isset($this->_resVector[$docId])) {            if ($this->_slop == 0) {                $freq = $this->_exactPhraseFreq($docId);            } else {                $freq = $this->_sloppyPhraseFreq($docId, $reader);            }            if ($freq != 0) {                $tf = $reader->getSimilarity()->tf($freq);                $weight = $this->_weight->getValue();                $norm = $reader->norm($docId, reset($this->_terms)->field);                return $tf * $weight * $norm * $this->getBoost();            }            // Included in result, but culculated freq is zero            return 0;        } else {            return 0;        }    }    /**     * Return query terms     *     * @return array     */    public function getQueryTerms()    {        return $this->_terms;    }    /**     * Highlight query terms     *     * @param integer &$colorIndex     * @param Zend_Search_Lucene_Document_Html $doc     */    public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)    {        $words = array();        foreach ($this->_terms as $term) {            $words[] = $term->text;        }        $doc->highlight($words, $this->_getHighlightColor($colorIndex));    }    /**     * Print a query     *     * @return string     */    public function __toString()    {        // It's used only for query visualisation, so we don't care about characters escaping        $query = '';        if (isset($this->_terms[0]) && $this->_terms[0]->field !== null) {            $query .= $this->_terms[0]->field . ':';        }        $query .= '"';        foreach ($this->_terms as $id => $term) {            if ($id != 0) {                $query .= ' ';            }            $query .= $term->text;        }        $query .= '"';        if ($this->_slop != 0) {            $query .= '~' . $this->_slop;        }        return $query;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -