📄 similarity.php

📁 很棒的在线教学系统
💻 PHP
📖 第 1 页 / 共 2 页
字号:
上一页 12
                                        229 => 8.388608E7,                                        230 => 1.00663296E8,                                        231 => 1.17440512E8,                                        232 => 1.34217728E8,                                        233 => 1.6777216E8,                                        234 => 2.01326592E8,                                        235 => 2.34881024E8,                                        236 => 2.68435456E8,                                        237 => 3.3554432E8,                                        238 => 4.02653184E8,                                        239 => 4.69762048E8,                                        240 => 5.3687091E8,                                        241 => 6.7108864E8,                                        242 => 8.0530637E8,                                        243 => 9.395241E8,                                        244 => 1.07374182E9,                                        245 => 1.34217728E9,                                        246 => 1.61061274E9,                                        247 => 1.87904819E9,                                        248 => 2.14748365E9,                                        249 => 2.68435456E9,                                        250 => 3.22122547E9,                                        251 => 3.75809638E9,                                        252 => 4.2949673E9,                                        253 => 5.3687091E9,                                        254 => 6.4424509E9,                                        255 => 7.5161928E9 );    /**     * Set the default Similarity implementation used by indexing and search     * code.     *     * @param Zend_Search_Lucene_Search_Similarity $similarity     */    public static function setDefault(Zend_Search_Lucene_Search_Similarity $similarity)    {        self::$_defaultImpl = $similarity;    }    /**     * Return the default Similarity implementation used by indexing and search     * code.     *     * @return Zend_Search_Lucene_Search_Similarity     */    public static function getDefault()    {        if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) {            self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default();        }        return self::$_defaultImpl;    }    /**     * Computes the normalization value for a field given the total number of     * terms contained in a field.  These values, together with field boosts, are     * stored in an index and multipled into scores for hits on each field by the     * search code.     *     * Matches in longer fields are less precise, so implemenations of this     * method usually return smaller values when 'numTokens' is large,     * and larger values when 'numTokens' is small.     *     * That these values are computed under     * IndexWriter::addDocument(Document) and stored then using     * encodeNorm(float).  Thus they have limited precision, and documents     * must be re-indexed if this method is altered.     *     * fieldName - name of field     * numTokens - the total number of tokens contained in fields named     *             'fieldName' of 'doc'.     * Returns a normalization factor for hits on this field of this document     *     * @param string $fieldName     * @param integer $numTokens     * @return float     */    abstract public function lengthNorm($fieldName, $numTokens);    /**     * Computes the normalization value for a query given the sum of the squared     * weights of each of the query terms.  This value is then multipled into the     * weight of each query term.     *     * This does not affect ranking, but rather just attempts to make scores     * from different queries comparable.     *     * sumOfSquaredWeights - the sum of the squares of query term weights     * Returns a normalization factor for query weights     *     * @param float $sumOfSquaredWeights     * @return float     */    abstract public function queryNorm($sumOfSquaredWeights);    /**     *  Decodes a normalization factor stored in an index.     *     * @param integer $byte     * @return float     */    public static function decodeNorm($byte)    {        return self::$_normTable[$byte & 0xFF];    }    /**     * Encodes a normalization factor for storage in an index.     *     * The encoding uses a five-bit exponent and three-bit mantissa, thus     * representing values from around 7x10^9 to 2x10^-9 with about one     * significant decimal digit of accuracy.  Zero is also represented.     * Negative numbers are rounded up to zero.  Values too large to represent     * are rounded down to the largest representable value.  Positive values too     * small to represent are rounded up to the smallest positive representable     * value.     *     * @param float $f     * @return integer     */    static function encodeNorm($f)    {      return self::_floatToByte($f);    }    /**     * Float to byte conversion     *     * @param integer $b     * @return float     */    private static function _floatToByte($f)    {        // round negatives up to zero        if ($f <= 0.0) {            return 0;        }        // search for appropriate value        $lowIndex = 0;        $highIndex = 255;        while ($highIndex >= $lowIndex) {            // $mid = ($highIndex - $lowIndex)/2;            $mid = ($highIndex + $lowIndex) >> 1;            $delta = $f - self::$_normTable[$mid];            if ($delta < 0) {                $highIndex = $mid-1;            } elseif ($delta > 0) {                $lowIndex  = $mid+1;            } else {                return $mid; // We got it!            }        }        // round to closest value        if ($highIndex != 255 &&            $f - self::$_normTable[$highIndex] > self::$_normTable[$highIndex+1] - $f ) {            return $highIndex + 1;        } else {            return $highIndex;        }    }    /**     * Computes a score factor based on a term or phrase's frequency in a     * document.  This value is multiplied by the idf(Term, Searcher)     * factor for each term in the query and these products are then summed to     * form the initial score for a document.     *     * Terms and phrases repeated in a document indicate the topic of the     * document, so implementations of this method usually return larger values     * when 'freq' is large, and smaller values when 'freq'     * is small.     *     * freq - the frequency of a term within a document     * Returns a score factor based on a term's within-document frequency     *     * @param float $freq     * @return float     */    abstract public function tf($freq);    /**     * Computes the amount of a sloppy phrase match, based on an edit distance.     * This value is summed for each sloppy phrase match in a document to form     * the frequency that is passed to tf(float).     *     * A phrase match with a small edit distance to a document passage more     * closely matches the document, so implementations of this method usually     * return larger values when the edit distance is small and smaller values     * when it is large.     *     * distance - the edit distance of this sloppy phrase match     * Returns the frequency increment for this match     *     * @param integer $distance     * @return float     */    abstract public function sloppyFreq($distance);    /**     * Computes a score factor for a simple term or a phrase.     *     * The default implementation is:     *   return idfFreq(searcher.docFreq(term), searcher.maxDoc());     *     * input - the term in question or array of terms     * reader - reader the document collection being searched     * Returns a score factor for the term     *     * @param mixed $input     * @param Zend_Search_Lucene_Interface $reader     * @return a score factor for the term     */    public function idf($input, Zend_Search_Lucene_Interface $reader)    {        if (!is_array($input)) {            return $this->idfFreq($reader->docFreq($input), $reader->count());        } else {            $idf = 0.0;            foreach ($input as $term) {                $idf += $this->idfFreq($reader->docFreq($term), $reader->count());            }            return $idf;        }    }    /**     * Computes a score factor based on a term's document frequency (the number     * of documents which contain the term).  This value is multiplied by the     * tf(int) factor for each term in the query and these products are     * then summed to form the initial score for a document.     *     * Terms that occur in fewer documents are better indicators of topic, so     * implemenations of this method usually return larger values for rare terms,     * and smaller values for common terms.     *     * docFreq - the number of documents which contain the term     * numDocs - the total number of documents in the collection     * Returns a score factor based on the term's document frequency     *     * @param integer $docFreq     * @param integer $numDocs     * @return float     */    abstract public function idfFreq($docFreq, $numDocs);    /**     * Computes a score factor based on the fraction of all query terms that a     * document contains.  This value is multiplied into scores.     *     * The presence of a large portion of the query terms indicates a better     * match with the query, so implemenations of this method usually return     * larger values when the ratio between these parameters is large and smaller     * values when the ratio between them is small.     *     * overlap - the number of query terms matched in the document     * maxOverlap - the total number of terms in the query     * Returns a score factor based on term overlap with the query     *     * @param integer $overlap     * @param integer $maxOverlap     * @return float     */    abstract public function coord($overlap, $maxOverlap);}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -