lexer.php

来自「太烦了」· PHP 代码 · 共 568 行 · 第 1/2 页

PHP
568
字号
<?php/* vim: set expandtab tabstop=4 shiftwidth=4: */// +----------------------------------------------------------------------+// | Copyright (c) 2002-2004 Brent Cook                                        |// +----------------------------------------------------------------------+// | This library is free software; you can redistribute it and/or        |// | modify it under the terms of the GNU Lesser General Public           |// | License as published by the Free Software Foundation; either         |// | version 2.1 of the License, or (at your option) any later version.   |// |                                                                      |// | This library is distributed in the hope that it will be useful,      |// | but WITHOUT ANY WARRANTY; without even the implied warranty of       |// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    |// | Lesser General Public License for more details.                      |// |                                                                      |// | You should have received a copy of the GNU Lesser General Public     |// | License along with this library; if not, write to the Free Software  |// | Foundation, Inc., 59 Temple Place, Suite 330,Boston,MA 02111-1307 USA|// +----------------------------------------------------------------------+// | Authors: Brent Cook <busterbcook@yahoo.com>                          |// |          Jason Pell <jasonpell@hotmail.com>                          |// +----------------------------------------------------------------------+//// $Id: Lexer.php,v 1.20 2004/05/07 12:33:35 busterb Exp $//include PEAR_DIR . 'SQL/ctype.php';// {{{ token definitions// variables: 'ident', 'sys_var'// values:    'real_val', 'text_val', 'int_val', null// }}}/** * A lexigraphical analyser inspired by the msql lexer * * @author  Brent Cook <busterbcook@yahoo.com> * @version 0.5 * @access  public * @package SQL_Parser */class Lexer{    // array of valid tokens for the lexer to recognize    // format is 'token literal'=>TOKEN_VALUE    var $symbols = array();// {{{ instance variables    var $tokPtr = 0;    var $tokStart = 0;    var $tokLen = 0;    var $tokText = '';    var $lineNo = 0;    var $lineBegin = 0;    var $string = '';    var $stringLen = 0;        // Will not be altered by skip()    var $tokAbsStart = 0;    var $skipText = '';        // Provide lookahead capability.    var $lookahead = 0;    // Specify how many tokens to save in tokenStack, so the    // token stream can be pushed back.    var $tokenStack = array();    var $stackPtr = 0;// }}}// {{{ incidental functions    function Lexer($string = '', $lookahead=0)    {        $this->string = $string;        $this->stringLen = strlen($string);        $this->lookahead = $lookahead;    }        function get() {        ++$this->tokPtr;        ++$this->tokLen;        return ($this->tokPtr <= $this->stringLen) ? $this->string{$this->tokPtr - 1} : null;    }    function unget() {        --$this->tokPtr;        --$this->tokLen;    }    function skip() {        ++$this->tokStart;        return ($this->tokPtr != $this->stringLen) ? $this->string{$this->tokPtr++} : '';    }    function revert() {        $this->tokPtr = $this->tokStart;        $this->tokLen = 0;    }    function isCompop($c) {        return (($c == '<') || ($c == '>') || ($c == '=') || ($c == '!'));    }// }}}// {{{ pushBack()/* * Push back a token, so the very next call to lex() will return that token. * Calls to this function will be ignored if there is no lookahead specified * to the constructor, or the pushBack() function has already been called the * maximum number of token's that can be looked ahead. */function pushBack(){    if($this->lookahead>0 && count($this->tokenStack)>0 && $this->stackPtr>0) {        $this->stackPtr--;    }}// }}}// {{{ lex()function lex(){    if($this->lookahead>0) {        // The stackPtr, should always be the same as the count of        // elements in the tokenStack.  The stackPtr, can be thought        // of as pointing to the next token to be added.  If however        // a pushBack() call is made, the stackPtr, will be less than the        // count, to indicate that we should take that token from the        // stack, instead of calling nextToken for a new token.        if ($this->stackPtr<count($this->tokenStack)) {            $this->tokText = $this->tokenStack[$this->stackPtr]['tokText'];            $this->skipText = $this->tokenStack[$this->stackPtr]['skipText'];            $token = $this->tokenStack[$this->stackPtr]['token'];                        // We have read the token, so now iterate again.            $this->stackPtr++;            return $token;        } else {            // If $tokenStack is full (equal to lookahead), pop the oldest            // element off, to make room for the new one.            if ($this->stackPtr == $this->lookahead) {                // For some reason array_shift and                // array_pop screw up the indexing, so we do it manually.                for($i=0; $i<(count($this->tokenStack)-1); $i++) {                    $this->tokenStack[$i] = $this->tokenStack[$i+1];                }                                // Indicate that we should put the element in                // at the stackPtr position.                $this->stackPtr--;            }                        $token = $this->nextToken();            $this->tokenStack[$this->stackPtr] =                array('token'=>$token,                      'tokText'=>$this->tokText,                      'skipText'=>$this->skipText);            $this->stackPtr++;            return $token;        }    }    else    {        return $this->nextToken();    }}// }}}// {{{ nextToken()function nextToken(){    if ($this->string == '') return;    $state = 0;    $this->tokAbsStart = $this->tokStart;        while (true){        //echo "State: $state, Char: $c\n";        switch($state) {            // {{{ State 0 : Start of token            case 0:                $this->tokPtr = $this->tokStart;                $this->tokText = '';                $this->tokLen = 0;                $c = $this->get();                if (is_null($c)) { // End Of Input                    $state = 1000;                    break;                }                while (($c == ' ') || ($c == "\t")                    || ($c == "\n") || ($c == "\r")) {                    if ($c == "\n" || $c == "\r") {                        // Handle MAC/Unix/Windows line endings.                        if($c == "\r") {                            $c = $this->skip();                                                            // If not DOS newline                            if($c != "\n")                                $this->unget();                        }                        ++$this->lineNo;                        $this->lineBegin = $this->tokPtr;                    }                                           $c = $this->skip();                    $this->tokLen = 1;                }                                // Escape quotes and backslashes                if ($c == '\\') {                     $t = $this->get();                    if ($t == '\'' || $t == '\\' || $t == '"') {                        $this->tokText = $t;                        $this->tokStart = $this->tokPtr;                        return $this->tokText;                    } else {                        $this->unget();                                                // Unknown token.  Revert to single char                        $state = 999;                        break;                    }                }                                if (($c == '\'') || ($c == '"')) { // text string                    $quote = $c;                    $state = 12;                    break;                }                if ($c == '_') { // system variable                    $state = 18;                    break;                }                if (ctype_alpha(ord($c))) { // keyword or ident                    $state = 1;                    break;                }                if (ctype_digit(ord($c))) { // real or int number                    $state = 5;                    break;                }                if ($c == '.') {                    $t = $this->get();                    if ($t == '.') { // ellipsis                        if ($this->get() == '.') {                            $this->tokText = '...';                            $this->tokStart = $this->tokPtr;                            return $this->tokText;                        } else {                            $state = 999;                            break;                        }                    } else if (ctype_digit(ord($t))) { // real number                        $this->unget();                        $state = 7;                        break;                    } else { // period                        $this->unget();                    }                }                if ($c == '#') { // Comments                    $state = 14;                    break;                }                if ($c == '-') {                    $t = $this->get();                    if ($t == '-') {                        $state = 14;                        break;                    } else { // negative number                        $this->unget();                        $state = 5;                        break;                    }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?