📄 similarity.php
字号:
229 => 8.388608E7, 230 => 1.00663296E8, 231 => 1.17440512E8, 232 => 1.34217728E8, 233 => 1.6777216E8, 234 => 2.01326592E8, 235 => 2.34881024E8, 236 => 2.68435456E8, 237 => 3.3554432E8, 238 => 4.02653184E8, 239 => 4.69762048E8, 240 => 5.3687091E8, 241 => 6.7108864E8, 242 => 8.0530637E8, 243 => 9.395241E8, 244 => 1.07374182E9, 245 => 1.34217728E9, 246 => 1.61061274E9, 247 => 1.87904819E9, 248 => 2.14748365E9, 249 => 2.68435456E9, 250 => 3.22122547E9, 251 => 3.75809638E9, 252 => 4.2949673E9, 253 => 5.3687091E9, 254 => 6.4424509E9, 255 => 7.5161928E9 ); /** * Set the default Similarity implementation used by indexing and search * code. * * @param Zend_Search_Lucene_Search_Similarity $similarity */ public static function setDefault(Zend_Search_Lucene_Search_Similarity $similarity) { self::$_defaultImpl = $similarity; } /** * Return the default Similarity implementation used by indexing and search * code. * * @return Zend_Search_Lucene_Search_Similarity */ public static function getDefault() { if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) { self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default(); } return self::$_defaultImpl; } /** * Computes the normalization value for a field given the total number of * terms contained in a field. These values, together with field boosts, are * stored in an index and multipled into scores for hits on each field by the * search code. * * Matches in longer fields are less precise, so implemenations of this * method usually return smaller values when 'numTokens' is large, * and larger values when 'numTokens' is small. * * That these values are computed under * IndexWriter::addDocument(Document) and stored then using * encodeNorm(float). Thus they have limited precision, and documents * must be re-indexed if this method is altered. * * fieldName - name of field * numTokens - the total number of tokens contained in fields named * 'fieldName' of 'doc'. * Returns a normalization factor for hits on this field of this document * * @param string $fieldName * @param integer $numTokens * @return float */ abstract public function lengthNorm($fieldName, $numTokens); /** * Computes the normalization value for a query given the sum of the squared * weights of each of the query terms. This value is then multipled into the * weight of each query term. * * This does not affect ranking, but rather just attempts to make scores * from different queries comparable. * * sumOfSquaredWeights - the sum of the squares of query term weights * Returns a normalization factor for query weights * * @param float $sumOfSquaredWeights * @return float */ abstract public function queryNorm($sumOfSquaredWeights); /** * Decodes a normalization factor stored in an index. * * @param integer $byte * @return float */ public static function decodeNorm($byte) { return self::$_normTable[$byte & 0xFF]; } /** * Encodes a normalization factor for storage in an index. * * The encoding uses a five-bit exponent and three-bit mantissa, thus * representing values from around 7x10^9 to 2x10^-9 with about one * significant decimal digit of accuracy. Zero is also represented. * Negative numbers are rounded up to zero. Values too large to represent * are rounded down to the largest representable value. Positive values too * small to represent are rounded up to the smallest positive representable * value. * * @param float $f * @return integer */ static function encodeNorm($f) { return self::_floatToByte($f); } /** * Float to byte conversion * * @param integer $b * @return float */ private static function _floatToByte($f) { // round negatives up to zero if ($f <= 0.0) { return 0; } // search for appropriate value $lowIndex = 0; $highIndex = 255; while ($highIndex >= $lowIndex) { // $mid = ($highIndex - $lowIndex)/2; $mid = ($highIndex + $lowIndex) >> 1; $delta = $f - self::$_normTable[$mid]; if ($delta < 0) { $highIndex = $mid-1; } elseif ($delta > 0) { $lowIndex = $mid+1; } else { return $mid; // We got it! } } // round to closest value if ($highIndex != 255 && $f - self::$_normTable[$highIndex] > self::$_normTable[$highIndex+1] - $f ) { return $highIndex + 1; } else { return $highIndex; } } /** * Computes a score factor based on a term or phrase's frequency in a * document. This value is multiplied by the idf(Term, Searcher) * factor for each term in the query and these products are then summed to * form the initial score for a document. * * Terms and phrases repeated in a document indicate the topic of the * document, so implementations of this method usually return larger values * when 'freq' is large, and smaller values when 'freq' * is small. * * freq - the frequency of a term within a document * Returns a score factor based on a term's within-document frequency * * @param float $freq * @return float */ abstract public function tf($freq); /** * Computes the amount of a sloppy phrase match, based on an edit distance. * This value is summed for each sloppy phrase match in a document to form * the frequency that is passed to tf(float). * * A phrase match with a small edit distance to a document passage more * closely matches the document, so implementations of this method usually * return larger values when the edit distance is small and smaller values * when it is large. * * distance - the edit distance of this sloppy phrase match * Returns the frequency increment for this match * * @param integer $distance * @return float */ abstract public function sloppyFreq($distance); /** * Computes a score factor for a simple term or a phrase. * * The default implementation is: * return idfFreq(searcher.docFreq(term), searcher.maxDoc()); * * input - the term in question or array of terms * reader - reader the document collection being searched * Returns a score factor for the term * * @param mixed $input * @param Zend_Search_Lucene_Interface $reader * @return a score factor for the term */ public function idf($input, Zend_Search_Lucene_Interface $reader) { if (!is_array($input)) { return $this->idfFreq($reader->docFreq($input), $reader->count()); } else { $idf = 0.0; foreach ($input as $term) { $idf += $this->idfFreq($reader->docFreq($term), $reader->count()); } return $idf; } } /** * Computes a score factor based on a term's document frequency (the number * of documents which contain the term). This value is multiplied by the * tf(int) factor for each term in the query and these products are * then summed to form the initial score for a document. * * Terms that occur in fewer documents are better indicators of topic, so * implemenations of this method usually return larger values for rare terms, * and smaller values for common terms. * * docFreq - the number of documents which contain the term * numDocs - the total number of documents in the collection * Returns a score factor based on the term's document frequency * * @param integer $docFreq * @param integer $numDocs * @return float */ abstract public function idfFreq($docFreq, $numDocs); /** * Computes a score factor based on the fraction of all query terms that a * document contains. This value is multiplied into scores. * * The presence of a large portion of the query terms indicates a better * match with the query, so implemenations of this method usually return * larger values when the ratio between these parameters is large and smaller * values when the ratio between them is small. * * overlap - the number of query terms matched in the document * maxOverlap - the total number of terms in the query * Returns a score factor based on term overlap with the query * * @param integer $overlap * @param integer $maxOverlap * @return float */ abstract public function coord($overlap, $maxOverlap);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -