📄 fuzzyquery.h
字号:
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#ifndef _lucene_search_FuzzyQuery_
#define _lucene_search_FuzzyQuery_
#if defined(_LUCENE_PRAGMA_ONCE)
# pragma once
#endif
#include "CLucene/index/IndexReader.h"
#include "CLucene/index/Term.h"
#include "MultiTermQuery.h"
#ifndef NO_FUZZY_QUERY
CL_NS_DEF(search)
// class FuzzyQuery implements the fuzzy search query
class FuzzyQuery: public MultiTermQuery {
private:
float_t minimumSimilarity;
size_t prefixLength;
protected:
FuzzyQuery(const FuzzyQuery& clone);
public:
static float_t defaultMinSimilarity;
/**
* Create a new FuzzyQuery that will match terms with a similarity
* of at least <code>minimumSimilarity</code> to <code>term</code>.
* If a <code>prefixLength</code> > 0 is specified, a common prefix
* of that length is also required.
*
* @param term the term to search for
* @param minimumSimilarity a value between 0 and 1 to set the required similarity
* between the query term and the matching terms. For example, for a
* <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
* as the query term is considered similar to the query term if the edit distance
* between both terms is less than <code>length(term)*0.5</code>
* @param prefixLength length of common (non-fuzzy) prefix
* @throws IllegalArgumentException if minimumSimilarity is > 1 or < 0
* or if prefixLength < 0 or > <code>term.text().length()</code>.
*/
FuzzyQuery(CL_NS(index)::Term* term, float_t minimumSimilarity=defaultMinSimilarity, size_t prefixLength=0);
//Destructor
~FuzzyQuery();
TCHAR* toString(const TCHAR* field) const;
//Returns the name "FuzzyQuery"
static const TCHAR* getClassName();
const TCHAR* getQueryName() const;
Query* clone() const;
bool equals(Query * other) const;
size_t hashCode() const;
/**
* Returns the minimum similarity that is required for this query to match.
* @return float value between 0.0 and 1.0
*/
float_t getMinSimilarity() const;
/**
* Returns the prefix length, i.e. the number of characters at the start
* of a term that must be identical (not fuzzy) to the query term if the query
* is to match that term.
*/
size_t getPrefixLength() const;
protected:
FilteredTermEnum* getEnum(CL_NS(index)::IndexReader* reader);
};
/** FuzzyTermEnum is a subclass of FilteredTermEnum for enumerating all
* terms that are similiar to the specified filter term.
*
* Term enumerations are always ordered by Term.compareTo(). Each term in
* the enumeration is greater than all that precede it.
*/
class FuzzyTermEnum: public FilteredTermEnum {
private:
float_t distance;
bool _endEnum;
CL_NS(index)::Term* searchTerm;
TCHAR* text;
size_t textLen;
TCHAR* prefix;
size_t prefixLength;
float_t minimumSimilarity;
double scale_factor;
/**
* This static array saves us from the time required to create a new array
* everytime editDistance is called.
*/
int32_t* e;
int32_t eWidth;
int32_t eHeight;
/******************************
* Compute Levenshtein distance
******************************/
/**
Levenshtein distance also known as edit distance is a measure of similiarity
between two strings where the distance is measured as the number of character
deletions, insertions or substitutions required to transform one string to
the other string.
<p>This method takes in four parameters; two strings and their respective
lengths to compute the Levenshtein distance between the two strings.
The result is returned as an integer.
*/
int32_t editDistance(const TCHAR* s, const TCHAR* t, const int32_t n, const int32_t m) ;
protected:
/**
The termCompare method in FuzzyTermEnum uses Levenshtein distance to
calculate the distance between the given term and the comparing term.
*/
bool termCompare(CL_NS(index)::Term* term) ;
///Returns the fact if the current term in the enumeration has reached the end
bool endEnum();
public:
/**
* Empty prefix and minSimilarity of 0.5f are used.
*
* @param reader
* @param term
* @throws IOException
* @see #FuzzyTermEnum(IndexReader, Term, float_t, int32_t)
*/
FuzzyTermEnum(const CL_NS(index)::IndexReader* reader, CL_NS(index)::Term* term, float_t minSimilarity=FuzzyQuery::defaultMinSimilarity, size_t prefixLength=0);
/** Destructor */
~FuzzyTermEnum();
/** Close the enumeration */
void close();
/** Returns the difference between the distance and the fuzzy threshold
* multiplied by the scale factor
*/
float_t difference();
const char* getObjectName(){ return FuzzyTermEnum::getClassName(); }
static const char* getClassName(){ return "FuzzyTermEnum"; }
};
CL_NS_END
#endif
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -