⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 queryparser.cpp

📁 clucene是c++版的全文检索引擎,完全移植于lucene,采用 stl 编写.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "QueryParser.h"

#include "QueryParserConstants.h"
#include "CLucene/analysis/AnalysisHeader.h"
#include "CLucene/util/Reader.h"
#include "CLucene/search/SearchHeader.h"
#include "CLucene/index/Term.h"

#include "CLucene/search/TermQuery.h"
#include "CLucene/search/PhraseQuery.h"
#include "CLucene/search/RangeQuery.h"
#include "CLucene/search/PrefixQuery.h"
#include "CLucene/search/WildcardQuery.h"
#include "CLucene/search/FuzzyQuery.h"
#include "CLucene/search/PrefixQuery.h"

#include "TokenList.h"
#include "QueryToken.h"
#include "QueryParserBase.h"
#include "Lexer.h"

CL_NS_USE(util)
CL_NS_USE(index)
CL_NS_USE(analysis)
CL_NS_USE(search)

CL_NS_DEF(queryParser)

    QueryParser::QueryParser(const TCHAR* _field, Analyzer* _analyzer) : analyzer(_analyzer){
    //Func - Constructor.
	//       Instantiates a QueryParser for the named field _field
	//Pre  - _field != NULL
	//Post - An instance has been created

        CND_PRECONDITION(_field != NULL, "_field is NULL");
//todo: xxx can we have fixed length fields? must change throughout library
		field = stringDuplicate( _field);
		tokens = NULL;
		lowercaseWildcardTerms = true;
	}

	QueryParser::~QueryParser() {
	//Func - Destructor
	//Pre  - true
	//Post - The instance has been destroyed

        _CLDELETE_CARRAY(field);
		//_CLDELETE(tokens); bvk: moved to the parser function
      //                 memory leak would occur if we setup a queryParser object
      //                 and called Parse several times
	}

    //static
    Query* QueryParser::parse(const TCHAR* query, const TCHAR* field, Analyzer* analyzer){
    //Func - Returns a new instance of the Query class with a specified query, field and
    //       analyzer values.
    //Pre  - query != NULL and holds the query to parse
	//       field != NULL and holds the default field for query terms
	//       analyzer holds a valid reference to an Analyzer and is used to
	//       find terms in the query text
	//Post - query has been parsed and an instance of Query has been returned

		CND_PRECONDITION(query != NULL, "query is NULL");
        CND_PRECONDITION(field != NULL, "field is NULL");

		QueryParser parser(field, analyzer);
		return parser.parse(query);
	}

    Query* QueryParser::parse(const TCHAR* query){
	//Func - Returns a parsed Query instance
	//Pre  - query != NULL and contains the query value to be parsed
	//Post - Returns a parsed Query Instance

        CND_PRECONDITION(query != NULL, "query is NULL");

		//Instantie a Stringer that can read the query string
        Reader* r = _CLNEW StringReader(query);

		//Check to see if r has been created properly
		CND_CONDITION(r != NULL, "Could not allocate memory for StringReader r");

		//Pointer for the return value
		Query* ret = NULL;

		try{
			//Parse the query managed by the StringReader R and return a parsed Query instance
			//into ret
			ret = parse(r);
		}_CLFINALLY (
			_CLDELETE(r);
		);

		//Check to if the ret points to a valid instance of Query
        CND_CONDITION(ret != NULL, "ret is NULL");

		return ret;
	}

	Query* QueryParser::parse(Reader* reader){
	//Func - Returns a parsed Query instance
	//Pre  - reader contains a valid reference to a Reader and manages the query string
	//Post - A parsed Query instance has been returned or

		//instantiate the TokenList tokens
		TokenList _tokens;
		this->tokens = &_tokens;

		//Instantiate a lexer
		Lexer lexer(reader);

		//tokens = lexer.Lex();
		//Lex the tokens
		lexer.Lex(tokens);

		//Peek to the first token and check if is an EOF
		if (tokens->peek()->Type == CL_NS(queryParser)::EOF_){
			// The query string failed to yield any tokens.  We discard the
			// TokenList tokens and raise an exceptioin.
			this->tokens = NULL;
		   _CLTHROWA(CL_ERR_Parse, "No query given.");
		}

		//Return the parsed Query instance
		Query* ret = MatchQuery(field);
		this->tokens = NULL;
      return ret;
	}

	int32_t QueryParser::MatchConjunction(){
	//Func - matches for CONJUNCTION
	//       CONJUNCTION ::= <AND> | <OR>
	//Pre  - tokens != NULL
	//Post - if the first token is an AND or an OR then
	//       the token is extracted and deleted and CONJ_AND or CONJ_OR is returned
	//       otherwise CONJ_NONE is returned

        CND_PRECONDITION(tokens != NULL, "tokens is NULL");

		switch(tokens->peek()->Type){
			case CL_NS(queryParser)::AND_ :
				//Delete the first token of tokenlist
				ExtractAndDeleteToken();
				return CONJ_AND;
			case CL_NS(queryParser)::OR   :
				//Delete the first token of tokenlist
				ExtractAndDeleteToken();
				return CONJ_OR;
			default :
				return CONJ_NONE;
		}
	}

	int32_t QueryParser::MatchModifier(){
	//Func - matches for MODIFIER
	//       MODIFIER ::= <PLUS> | <MINUS> | <NOT>
	//Pre  - tokens != NULL
	//Post - if the first token is a PLUS the token is extracted and deleted and MOD_REQ is returned
	//       if the first token is a MINUS or NOT the token is extracted and deleted and MOD_NOT is returned
	//       otherwise MOD_NONE is returned
		CND_PRECONDITION(tokens != NULL, "tokens is NULL");

		switch(tokens->peek()->Type){
			case CL_NS(queryParser)::PLUS :
				//Delete the first token of tokenlist
				ExtractAndDeleteToken();
				return MOD_REQ;
			case CL_NS(queryParser)::MINUS :
			case CL_NS(queryParser)::NOT   :
				//Delete the first token of tokenlist
				ExtractAndDeleteToken();
				return MOD_NOT;
			default :
				return MOD_NONE;
		}
	}

	Query* QueryParser::MatchQuery(const TCHAR* field){
	//Func - matches for QUERY
	//       QUERY ::= [MODIFIER] QueryParser::CLAUSE (<CONJUNCTION> [MODIFIER] CLAUSE)*
	//Pre  - field != NULL
	//Post -

		CND_PRECONDITION(tokens != NULL, "tokens is NULL");

		CLVector<BooleanClause*> clauses(false);

		Query* q = NULL;
		//Query* firstQuery = NULL; //bvk: why do this?
		//bug: bvk: if the firstQuery turns out to be null
		//for example if the first clause is an empty phrase
		//then booleanquery ends up being created...

		int32_t mods = MOD_NONE;
		int32_t conj = CONJ_NONE;

		//match for MODIFIER
		mods = MatchModifier();

		//match for CLAUSE
		q = MatchClause(field);
		AddClause(&clauses, CONJ_NONE, mods, q);

		//if(mods == MOD_NONE){
		//	firstQuery = q;
		//}

		// match for CLAUSE*
		while(true){
			QueryToken* p = tokens->peek();
			if(p->Type == CL_NS(queryParser)::EOF_){
				QueryToken* qt = MatchQueryToken(CL_NS(queryParser)::EOF_);
				_CLDELETE(qt);
				break;
			}

			if(p->Type == CL_NS(queryParser)::RPAREN){
				//MatchQueryToken(CL_NS(queryParser)::RPAREN);
				break;
			}

			//match for a conjuction (AND OR NOT)
			conj = MatchConjunction();
			//match for a modifier
			mods = MatchModifier();

			q = MatchClause(field);
			if ( q != NULL )
				AddClause(&clauses, conj, mods, q);
		}

		// finalize query
		if(clauses.size() == 1){ //bvk: removed this && firstQuery != NULL
			BooleanClause* c = clauses[0];
			Query* q = c->query;

			//Condition check to be sure clauses[0] is valid
			CND_CONDITION(c != NULL, "c is NULL");

			//Tell the boolean clause not to delete its query
			c->deleteQuery=false;
			//Clear the clauses list
			clauses.clear();
			_CLDELETE(c);

			return q;
		}else{
			BooleanQuery* query = _CLNEW BooleanQuery();
			//Condition check to see if query has been allocated properly
			CND_CONDITION(query != NULL, "No memory could be allocated for query");

			//iterate through all the clauses
			for( uint32_t i=0;i<clauses.size();i++ ){
				//Condition check to see if clauses[i] is valdid
				CND_CONDITION(clauses[i] != NULL, "clauses[i] is NULL");
				//Add it to query
				query->add(clauses[i]);
			}
			return query;
		}
	}

	Query* QueryParser::MatchClause(const TCHAR* field){
	//Func - matches for CLAUSE
	//       CLAUSE ::= [TERM <COLONQueryParser::>] ( TERM | (<LPAREN> QUERY <RPAREN>))
	//Pre  - field != NULL
	//Post -

		CND_PRECONDITION(field != NULL, "field is NULL");

		Query* q = NULL;
		const TCHAR* sfield = field;
		bool delField = false;

		QueryToken *DelToken = NULL;

		//match for [TERM <COLON>]
		QueryToken* term = tokens->extract();
		if(term->Type == CL_NS(queryParser)::TERM && tokens->peek()->Type == CL_NS(queryParser)::COLON){
			DelToken = MatchQueryToken(CL_NS(queryParser)::COLON);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -