lexer.php
来自「太烦了」· PHP 代码 · 共 568 行 · 第 1/2 页
PHP
568 行
<?php/* vim: set expandtab tabstop=4 shiftwidth=4: */// +----------------------------------------------------------------------+// | Copyright (c) 2002-2004 Brent Cook |// +----------------------------------------------------------------------+// | This library is free software; you can redistribute it and/or |// | modify it under the terms of the GNU Lesser General Public |// | License as published by the Free Software Foundation; either |// | version 2.1 of the License, or (at your option) any later version. |// | |// | This library is distributed in the hope that it will be useful, |// | but WITHOUT ANY WARRANTY; without even the implied warranty of |// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |// | Lesser General Public License for more details. |// | |// | You should have received a copy of the GNU Lesser General Public |// | License along with this library; if not, write to the Free Software |// | Foundation, Inc., 59 Temple Place, Suite 330,Boston,MA 02111-1307 USA|// +----------------------------------------------------------------------+// | Authors: Brent Cook <busterbcook@yahoo.com> |// | Jason Pell <jasonpell@hotmail.com> |// +----------------------------------------------------------------------+//// $Id: Lexer.php,v 1.20 2004/05/07 12:33:35 busterb Exp $//include PEAR_DIR . 'SQL/ctype.php';// {{{ token definitions// variables: 'ident', 'sys_var'// values: 'real_val', 'text_val', 'int_val', null// }}}/** * A lexigraphical analyser inspired by the msql lexer * * @author Brent Cook <busterbcook@yahoo.com> * @version 0.5 * @access public * @package SQL_Parser */class Lexer{ // array of valid tokens for the lexer to recognize // format is 'token literal'=>TOKEN_VALUE var $symbols = array();// {{{ instance variables var $tokPtr = 0; var $tokStart = 0; var $tokLen = 0; var $tokText = ''; var $lineNo = 0; var $lineBegin = 0; var $string = ''; var $stringLen = 0; // Will not be altered by skip() var $tokAbsStart = 0; var $skipText = ''; // Provide lookahead capability. var $lookahead = 0; // Specify how many tokens to save in tokenStack, so the // token stream can be pushed back. var $tokenStack = array(); var $stackPtr = 0;// }}}// {{{ incidental functions function Lexer($string = '', $lookahead=0) { $this->string = $string; $this->stringLen = strlen($string); $this->lookahead = $lookahead; } function get() { ++$this->tokPtr; ++$this->tokLen; return ($this->tokPtr <= $this->stringLen) ? $this->string{$this->tokPtr - 1} : null; } function unget() { --$this->tokPtr; --$this->tokLen; } function skip() { ++$this->tokStart; return ($this->tokPtr != $this->stringLen) ? $this->string{$this->tokPtr++} : ''; } function revert() { $this->tokPtr = $this->tokStart; $this->tokLen = 0; } function isCompop($c) { return (($c == '<') || ($c == '>') || ($c == '=') || ($c == '!')); }// }}}// {{{ pushBack()/* * Push back a token, so the very next call to lex() will return that token. * Calls to this function will be ignored if there is no lookahead specified * to the constructor, or the pushBack() function has already been called the * maximum number of token's that can be looked ahead. */function pushBack(){ if($this->lookahead>0 && count($this->tokenStack)>0 && $this->stackPtr>0) { $this->stackPtr--; }}// }}}// {{{ lex()function lex(){ if($this->lookahead>0) { // The stackPtr, should always be the same as the count of // elements in the tokenStack. The stackPtr, can be thought // of as pointing to the next token to be added. If however // a pushBack() call is made, the stackPtr, will be less than the // count, to indicate that we should take that token from the // stack, instead of calling nextToken for a new token. if ($this->stackPtr<count($this->tokenStack)) { $this->tokText = $this->tokenStack[$this->stackPtr]['tokText']; $this->skipText = $this->tokenStack[$this->stackPtr]['skipText']; $token = $this->tokenStack[$this->stackPtr]['token']; // We have read the token, so now iterate again. $this->stackPtr++; return $token; } else { // If $tokenStack is full (equal to lookahead), pop the oldest // element off, to make room for the new one. if ($this->stackPtr == $this->lookahead) { // For some reason array_shift and // array_pop screw up the indexing, so we do it manually. for($i=0; $i<(count($this->tokenStack)-1); $i++) { $this->tokenStack[$i] = $this->tokenStack[$i+1]; } // Indicate that we should put the element in // at the stackPtr position. $this->stackPtr--; } $token = $this->nextToken(); $this->tokenStack[$this->stackPtr] = array('token'=>$token, 'tokText'=>$this->tokText, 'skipText'=>$this->skipText); $this->stackPtr++; return $token; } } else { return $this->nextToken(); }}// }}}// {{{ nextToken()function nextToken(){ if ($this->string == '') return; $state = 0; $this->tokAbsStart = $this->tokStart; while (true){ //echo "State: $state, Char: $c\n"; switch($state) { // {{{ State 0 : Start of token case 0: $this->tokPtr = $this->tokStart; $this->tokText = ''; $this->tokLen = 0; $c = $this->get(); if (is_null($c)) { // End Of Input $state = 1000; break; } while (($c == ' ') || ($c == "\t") || ($c == "\n") || ($c == "\r")) { if ($c == "\n" || $c == "\r") { // Handle MAC/Unix/Windows line endings. if($c == "\r") { $c = $this->skip(); // If not DOS newline if($c != "\n") $this->unget(); } ++$this->lineNo; $this->lineBegin = $this->tokPtr; } $c = $this->skip(); $this->tokLen = 1; } // Escape quotes and backslashes if ($c == '\\') { $t = $this->get(); if ($t == '\'' || $t == '\\' || $t == '"') { $this->tokText = $t; $this->tokStart = $this->tokPtr; return $this->tokText; } else { $this->unget(); // Unknown token. Revert to single char $state = 999; break; } } if (($c == '\'') || ($c == '"')) { // text string $quote = $c; $state = 12; break; } if ($c == '_') { // system variable $state = 18; break; } if (ctype_alpha(ord($c))) { // keyword or ident $state = 1; break; } if (ctype_digit(ord($c))) { // real or int number $state = 5; break; } if ($c == '.') { $t = $this->get(); if ($t == '.') { // ellipsis if ($this->get() == '.') { $this->tokText = '...'; $this->tokStart = $this->tokPtr; return $this->tokText; } else { $state = 999; break; } } else if (ctype_digit(ord($t))) { // real number $this->unget(); $state = 7; break; } else { // period $this->unget(); } } if ($c == '#') { // Comments $state = 14; break; } if ($c == '-') { $t = $this->get(); if ($t == '-') { $state = 14; break; } else { // negative number $this->unget(); $state = 5; break; }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?