📄 queryparser.cpp
字号:
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "QueryParser.h"
#include "QueryParserConstants.h"
#include "CLucene/analysis/AnalysisHeader.h"
#include "CLucene/util/Reader.h"
#include "CLucene/search/SearchHeader.h"
#include "CLucene/index/Term.h"
#include "CLucene/search/TermQuery.h"
#include "CLucene/search/PhraseQuery.h"
#include "CLucene/search/RangeQuery.h"
#include "CLucene/search/PrefixQuery.h"
#include "CLucene/search/WildcardQuery.h"
#include "CLucene/search/FuzzyQuery.h"
#include "CLucene/search/PrefixQuery.h"
#include "TokenList.h"
#include "QueryToken.h"
#include "QueryParserBase.h"
#include "Lexer.h"
CL_NS_USE(util)
CL_NS_USE(index)
CL_NS_USE(analysis)
CL_NS_USE(search)
CL_NS_DEF(queryParser)
QueryParser::QueryParser(const TCHAR* _field, Analyzer* _analyzer) : analyzer(_analyzer){
//Func - Constructor.
// Instantiates a QueryParser for the named field _field
//Pre - _field != NULL
//Post - An instance has been created
CND_PRECONDITION(_field != NULL, "_field is NULL");
//todo: xxx can we have fixed length fields? must change throughout library
field = stringDuplicate( _field);
tokens = NULL;
lowercaseWildcardTerms = true;
}
QueryParser::~QueryParser() {
//Func - Destructor
//Pre - true
//Post - The instance has been destroyed
_CLDELETE_CARRAY(field);
//_CLDELETE(tokens); bvk: moved to the parser function
// memory leak would occur if we setup a queryParser object
// and called Parse several times
}
//static
Query* QueryParser::parse(const TCHAR* query, const TCHAR* field, Analyzer* analyzer){
//Func - Returns a new instance of the Query class with a specified query, field and
// analyzer values.
//Pre - query != NULL and holds the query to parse
// field != NULL and holds the default field for query terms
// analyzer holds a valid reference to an Analyzer and is used to
// find terms in the query text
//Post - query has been parsed and an instance of Query has been returned
CND_PRECONDITION(query != NULL, "query is NULL");
CND_PRECONDITION(field != NULL, "field is NULL");
QueryParser parser(field, analyzer);
return parser.parse(query);
}
Query* QueryParser::parse(const TCHAR* query){
//Func - Returns a parsed Query instance
//Pre - query != NULL and contains the query value to be parsed
//Post - Returns a parsed Query Instance
CND_PRECONDITION(query != NULL, "query is NULL");
//Instantie a Stringer that can read the query string
Reader* r = _CLNEW StringReader(query);
//Check to see if r has been created properly
CND_CONDITION(r != NULL, "Could not allocate memory for StringReader r");
//Pointer for the return value
Query* ret = NULL;
try{
//Parse the query managed by the StringReader R and return a parsed Query instance
//into ret
ret = parse(r);
}_CLFINALLY (
_CLDELETE(r);
);
//Check to if the ret points to a valid instance of Query
CND_CONDITION(ret != NULL, "ret is NULL");
return ret;
}
Query* QueryParser::parse(Reader* reader){
//Func - Returns a parsed Query instance
//Pre - reader contains a valid reference to a Reader and manages the query string
//Post - A parsed Query instance has been returned or
//instantiate the TokenList tokens
TokenList _tokens;
this->tokens = &_tokens;
//Instantiate a lexer
Lexer lexer(reader);
//tokens = lexer.Lex();
//Lex the tokens
lexer.Lex(tokens);
//Peek to the first token and check if is an EOF
if (tokens->peek()->Type == CL_NS(queryParser)::EOF_){
// The query string failed to yield any tokens. We discard the
// TokenList tokens and raise an exceptioin.
this->tokens = NULL;
_CLTHROWA(CL_ERR_Parse, "No query given.");
}
//Return the parsed Query instance
Query* ret = MatchQuery(field);
this->tokens = NULL;
return ret;
}
int32_t QueryParser::MatchConjunction(){
//Func - matches for CONJUNCTION
// CONJUNCTION ::= <AND> | <OR>
//Pre - tokens != NULL
//Post - if the first token is an AND or an OR then
// the token is extracted and deleted and CONJ_AND or CONJ_OR is returned
// otherwise CONJ_NONE is returned
CND_PRECONDITION(tokens != NULL, "tokens is NULL");
switch(tokens->peek()->Type){
case CL_NS(queryParser)::AND_ :
//Delete the first token of tokenlist
ExtractAndDeleteToken();
return CONJ_AND;
case CL_NS(queryParser)::OR :
//Delete the first token of tokenlist
ExtractAndDeleteToken();
return CONJ_OR;
default :
return CONJ_NONE;
}
}
int32_t QueryParser::MatchModifier(){
//Func - matches for MODIFIER
// MODIFIER ::= <PLUS> | <MINUS> | <NOT>
//Pre - tokens != NULL
//Post - if the first token is a PLUS the token is extracted and deleted and MOD_REQ is returned
// if the first token is a MINUS or NOT the token is extracted and deleted and MOD_NOT is returned
// otherwise MOD_NONE is returned
CND_PRECONDITION(tokens != NULL, "tokens is NULL");
switch(tokens->peek()->Type){
case CL_NS(queryParser)::PLUS :
//Delete the first token of tokenlist
ExtractAndDeleteToken();
return MOD_REQ;
case CL_NS(queryParser)::MINUS :
case CL_NS(queryParser)::NOT :
//Delete the first token of tokenlist
ExtractAndDeleteToken();
return MOD_NOT;
default :
return MOD_NONE;
}
}
Query* QueryParser::MatchQuery(const TCHAR* field){
//Func - matches for QUERY
// QUERY ::= [MODIFIER] QueryParser::CLAUSE (<CONJUNCTION> [MODIFIER] CLAUSE)*
//Pre - field != NULL
//Post -
CND_PRECONDITION(tokens != NULL, "tokens is NULL");
CLVector<BooleanClause*> clauses(false);
Query* q = NULL;
//Query* firstQuery = NULL; //bvk: why do this?
//bug: bvk: if the firstQuery turns out to be null
//for example if the first clause is an empty phrase
//then booleanquery ends up being created...
int32_t mods = MOD_NONE;
int32_t conj = CONJ_NONE;
//match for MODIFIER
mods = MatchModifier();
//match for CLAUSE
q = MatchClause(field);
AddClause(&clauses, CONJ_NONE, mods, q);
//if(mods == MOD_NONE){
// firstQuery = q;
//}
// match for CLAUSE*
while(true){
QueryToken* p = tokens->peek();
if(p->Type == CL_NS(queryParser)::EOF_){
QueryToken* qt = MatchQueryToken(CL_NS(queryParser)::EOF_);
_CLDELETE(qt);
break;
}
if(p->Type == CL_NS(queryParser)::RPAREN){
//MatchQueryToken(CL_NS(queryParser)::RPAREN);
break;
}
//match for a conjuction (AND OR NOT)
conj = MatchConjunction();
//match for a modifier
mods = MatchModifier();
q = MatchClause(field);
if ( q != NULL )
AddClause(&clauses, conj, mods, q);
}
// finalize query
if(clauses.size() == 1){ //bvk: removed this && firstQuery != NULL
BooleanClause* c = clauses[0];
Query* q = c->query;
//Condition check to be sure clauses[0] is valid
CND_CONDITION(c != NULL, "c is NULL");
//Tell the boolean clause not to delete its query
c->deleteQuery=false;
//Clear the clauses list
clauses.clear();
_CLDELETE(c);
return q;
}else{
BooleanQuery* query = _CLNEW BooleanQuery();
//Condition check to see if query has been allocated properly
CND_CONDITION(query != NULL, "No memory could be allocated for query");
//iterate through all the clauses
for( uint32_t i=0;i<clauses.size();i++ ){
//Condition check to see if clauses[i] is valdid
CND_CONDITION(clauses[i] != NULL, "clauses[i] is NULL");
//Add it to query
query->add(clauses[i]);
}
return query;
}
}
Query* QueryParser::MatchClause(const TCHAR* field){
//Func - matches for CLAUSE
// CLAUSE ::= [TERM <COLONQueryParser::>] ( TERM | (<LPAREN> QUERY <RPAREN>))
//Pre - field != NULL
//Post -
CND_PRECONDITION(field != NULL, "field is NULL");
Query* q = NULL;
const TCHAR* sfield = field;
bool delField = false;
QueryToken *DelToken = NULL;
//match for [TERM <COLON>]
QueryToken* term = tokens->extract();
if(term->Type == CL_NS(queryParser)::TERM && tokens->peek()->Type == CL_NS(queryParser)::COLON){
DelToken = MatchQueryToken(CL_NS(queryParser)::COLON);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -