querylexer.php
来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· PHP 代码 · 共 509 行 · 第 1/2 页
PHP
509 行
$addQuotedLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuotedLexeme');
$addNumberLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addNumberLexeme');
$addLexemeCharAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeChar');
/** Syntax lexeme */
$this->addEntryAction(self::ST_SYNT_LEXEME, $syntaxLexemeAction);
// Two lexemes in succession
$this->addTransitionAction(self::ST_SYNT_LEXEME, self::ST_SYNT_LEXEME, $syntaxLexemeAction);
/** Lexeme */
$this->addEntryAction(self::ST_LEXEME, $addLexemeCharAction);
$this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME, $addLexemeCharAction);
// ST_ESCAPED_CHAR => ST_LEXEME transition is covered by ST_LEXEME entry action
$this->addTransitionAction(self::ST_LEXEME, self::ST_WHITE_SPACE, $addLexemeAction);
$this->addTransitionAction(self::ST_LEXEME, self::ST_SYNT_LEXEME, $addLexemeAction);
$this->addTransitionAction(self::ST_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeAction);
$this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME_MODIFIER, $addLexemeAction);
$this->addTransitionAction(self::ST_LEXEME, self::ST_NUMBER, $addLexemeAction);
$this->addTransitionAction(self::ST_LEXEME, self::ST_MANTISSA, $addLexemeAction);
/** Quoted lexeme */
// We don't need entry action (skeep quote)
$this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
$this->addTransitionAction(self::ST_ESCAPED_QCHAR, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
// Closing quote changes state to the ST_WHITE_SPACE other states are not used
$this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_WHITE_SPACE, $addQuotedLexemeAction);
/** Lexeme modifier */
$this->addEntryAction(self::ST_LEXEME_MODIFIER, $lexemeModifierAction);
/** Number */
$this->addEntryAction(self::ST_NUMBER, $addLexemeCharAction);
$this->addEntryAction(self::ST_MANTISSA, $addLexemeCharAction);
$this->addTransitionAction(self::ST_NUMBER, self::ST_NUMBER, $addLexemeCharAction);
// ST_NUMBER => ST_MANTISSA transition is covered by ST_MANTISSA entry action
$this->addTransitionAction(self::ST_MANTISSA, self::ST_MANTISSA, $addLexemeCharAction);
$this->addTransitionAction(self::ST_NUMBER, self::ST_WHITE_SPACE, $addNumberLexemeAction);
$this->addTransitionAction(self::ST_NUMBER, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
$this->addTransitionAction(self::ST_NUMBER, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
$this->addTransitionAction(self::ST_MANTISSA, self::ST_WHITE_SPACE, $addNumberLexemeAction);
$this->addTransitionAction(self::ST_MANTISSA, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
$this->addTransitionAction(self::ST_MANTISSA, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
}
/**
* Translate input char to an input symbol of state machine
*
* @param string $char
* @return integer
*/
private function _translateInput($char)
{
if (strpos(self::QUERY_WHITE_SPACE_CHARS, $char) !== false) { return self::IN_WHITE_SPACE;
} else if (strpos(self::QUERY_SYNT_CHARS, $char) !== false) { return self::IN_SYNT_CHAR;
} else if (strpos(self::QUERY_MUTABLE_CHARS, $char) !== false) { return self::IN_MUTABLE_CHAR;
} else if (strpos(self::QUERY_LEXEMEMODIFIER_CHARS, $char) !== false) { return self::IN_LEXEME_MODIFIER;
} else if (strpos(self::QUERY_ASCIIDIGITS_CHARS, $char) !== false) { return self::IN_ASCII_DIGIT;
} else if ($char === '"' ) { return self::IN_QUOTE;
} else if ($char === '.' ) { return self::IN_DECIMAL_POINT;
} else if ($char === '\\') { return self::IN_ESCAPE_CHAR;
} else { return self::IN_CHAR;
}
}
/**
* This method is used to tokenize query string into lexemes
*
* @param string $inputString
* @param string $encoding
* @return array
* @throws Zend_Search_Lucene_Search_QueryParserException
*/
public function tokenize($inputString, $encoding)
{
$this->reset();
$this->_lexemes = array();
$this->_queryString = array();
$strLength = iconv_strlen($inputString, $encoding);
// Workaround for iconv_substr bug
$inputString .= ' ';
for ($count = 0; $count < $strLength; $count++) {
$this->_queryString[$count] = iconv_substr($inputString, $count, 1, $encoding);
}
for ($this->_queryStringPosition = 0;
$this->_queryStringPosition < count($this->_queryString);
$this->_queryStringPosition++) {
$this->process($this->_translateInput($this->_queryString[$this->_queryStringPosition]));
}
$this->process(self::IN_WHITE_SPACE);
if ($this->getState() != self::ST_WHITE_SPACE) {
throw new Zend_Search_Lucene_Search_QueryParserException('Unexpected end of query');
}
$this->_queryString = null;
return $this->_lexemes;
}
/*********************************************************************
* Actions implementation
*
* Actions affect on recognized lexemes list
*********************************************************************/
/**
* Add query syntax lexeme
*
* @throws Zend_Search_Lucene_Search_QueryParserException
*/
public function addQuerySyntaxLexeme()
{
$lexeme = $this->_queryString[$this->_queryStringPosition];
// Process two char lexemes
if (strpos(self::QUERY_DOUBLECHARLEXEME_CHARS, $lexeme) !== false) {
// increase current position in a query string
$this->_queryStringPosition++;
// check,
if ($this->_queryStringPosition == count($this->_queryString) ||
$this->_queryString[$this->_queryStringPosition] != $lexeme) {
throw new Zend_Search_Lucene_Search_QueryParserException('Two chars lexeme expected. ' . $this->_positionMsg());
}
// duplicate character
$lexeme .= $lexeme;
}
$token = new Zend_Search_Lucene_Search_QueryToken(
Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
$lexeme,
$this->_queryStringPosition);
// Skip this lexeme if it's a field indicator ':' and treat previous as 'field' instead of 'word'
if ($token->type == Zend_Search_Lucene_Search_QueryToken::TT_FIELD_INDICATOR) {
$token = array_pop($this->_lexemes);
if ($token === null || $token->type != Zend_Search_Lucene_Search_QueryToken::TT_WORD) {
throw new Zend_Search_Lucene_Search_QueryParserException('Field mark \':\' must follow field name. ' . $this->_positionMsg());
}
$token->type = Zend_Search_Lucene_Search_QueryToken::TT_FIELD;
}
$this->_lexemes[] = $token;
}
/**
* Add lexeme modifier
*/
public function addLexemeModifier()
{
$this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
$this->_queryString[$this->_queryStringPosition],
$this->_queryStringPosition);
}
/**
* Add lexeme
*/
public function addLexeme()
{
$this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
Zend_Search_Lucene_Search_QueryToken::TC_WORD,
$this->_currentLexeme,
$this->_queryStringPosition - 1);
$this->_currentLexeme = '';
}
/**
* Add quoted lexeme
*/
public function addQuotedLexeme()
{
$this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
Zend_Search_Lucene_Search_QueryToken::TC_PHRASE,
$this->_currentLexeme,
$this->_queryStringPosition);
$this->_currentLexeme = '';
}
/**
* Add number lexeme
*/
public function addNumberLexeme()
{
$this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
Zend_Search_Lucene_Search_QueryToken::TC_NUMBER,
$this->_currentLexeme,
$this->_queryStringPosition - 1);
$this->_currentLexeme = '';
}
/**
* Extend lexeme by one char
*/
public function addLexemeChar()
{
$this->_currentLexeme .= $this->_queryString[$this->_queryStringPosition];
}
/**
* Position message
*
* @return string
*/
private function _positionMsg()
{
return 'Position is ' . $this->_queryStringPosition . '.';
}
/*********************************************************************
* Syntax errors actions
*********************************************************************/
public function lexModifierErrException()
{
throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier character can be followed only by number, white space or query syntax element. ' . $this->_positionMsg());
}
public function quoteWithinLexemeErrException()
{
throw new Zend_Search_Lucene_Search_QueryParserException('Quote within lexeme must be escaped by \'\\\' char. ' . $this->_positionMsg());
}
public function wrongNumberErrException()
{
throw new Zend_Search_Lucene_Search_QueryParserException('Wrong number syntax.' . $this->_positionMsg());
}
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?