parser.php
来自「一款可以和GOOGLE媲美的开源统计系统,运用AJAX.功能强大. 无色提示:」· PHP 代码 · 共 767 行 · 第 1/2 页
PHP
767 行
<?php/** * base include file for SimpleTest * @package SimpleTest * @subpackage MockObjects * @version $Id: parser.php 163 2008-01-14 04:40:16Z matt $ *//**#@+ * Lexer mode stack constants */foreach (array('LEXER_ENTER', 'LEXER_MATCHED', 'LEXER_UNMATCHED', 'LEXER_EXIT', 'LEXER_SPECIAL') as $i => $constant) { if (! defined($constant)) { define($constant, $i + 1); }}/**#@-*//** * Compounded regular expression. Any of * the contained patterns could match and * when one does, it's label is returned. * @package SimpleTest * @subpackage WebTester */class ParallelRegex { var $_patterns; var $_labels; var $_regex; var $_case; /** * Constructor. Starts with no patterns. * @param boolean $case True for case sensitive, false * for insensitive. * @access public */ function ParallelRegex($case) { $this->_case = $case; $this->_patterns = array(); $this->_labels = array(); $this->_regex = null; } /** * Adds a pattern with an optional label. * @param string $pattern Perl style regex, but ( and ) * lose the usual meaning. * @param string $label Label of regex to be returned * on a match. * @access public */ function addPattern($pattern, $label = true) { $count = count($this->_patterns); $this->_patterns[$count] = $pattern; $this->_labels[$count] = $label; $this->_regex = null; } /** * Attempts to match all patterns at once against * a string. * @param string $subject String to match against. * @param string $match First matched portion of * subject. * @return boolean True on success. * @access public */ function match($subject, &$match) { if (count($this->_patterns) == 0) { return false; } if (! preg_match($this->_getCompoundedRegex(), $subject, $matches)) { $match = ''; return false; } $match = $matches[0]; for ($i = 1; $i < count($matches); $i++) { if ($matches[$i]) { return $this->_labels[$i - 1]; } } return true; } /** * Compounds the patterns into a single * regular expression separated with the * "or" operator. Caches the regex. * Will automatically escape (, ) and / tokens. * @param array $patterns List of patterns in order. * @access private */ function _getCompoundedRegex() { if ($this->_regex == null) { for ($i = 0, $count = count($this->_patterns); $i < $count; $i++) { $this->_patterns[$i] = '(' . str_replace( array('/', '(', ')'), array('\/', '\(', '\)'), $this->_patterns[$i]) . ')'; } $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags(); } return $this->_regex; } /** * Accessor for perl regex mode flags to use. * @return string Perl regex flags. * @access private */ function _getPerlMatchingFlags() { return ($this->_case ? "msS" : "msSi"); }}/** * States for a stack machine. * @package SimpleTest * @subpackage WebTester */class SimpleStateStack { var $_stack; /** * Constructor. Starts in named state. * @param string $start Starting state name. * @access public */ function SimpleStateStack($start) { $this->_stack = array($start); } /** * Accessor for current state. * @return string State. * @access public */ function getCurrent() { return $this->_stack[count($this->_stack) - 1]; } /** * Adds a state to the stack and sets it * to be the current state. * @param string $state New state. * @access public */ function enter($state) { array_push($this->_stack, $state); } /** * Leaves the current state and reverts * to the previous one. * @return boolean False if we drop off * the bottom of the list. * @access public */ function leave() { if (count($this->_stack) == 1) { return false; } array_pop($this->_stack); return true; }}/** * Accepts text and breaks it into tokens. * Some optimisation to make the sure the * content is only scanned by the PHP regex * parser once. Lexer modes must not start * with leading underscores. * @package SimpleTest * @subpackage WebTester */class SimpleLexer { var $_regexes; var $_parser; var $_mode; var $_mode_handlers; var $_case; /** * Sets up the lexer in case insensitive matching * by default. * @param SimpleSaxParser $parser Handling strategy by * reference. * @param string $start Starting handler. * @param boolean $case True for case sensitive. * @access public */ function SimpleLexer(&$parser, $start = "accept", $case = false) { $this->_case = $case; $this->_regexes = array(); $this->_parser = &$parser; $this->_mode = &new SimpleStateStack($start); $this->_mode_handlers = array($start => $start); } /** * Adds a token search pattern for a particular * parsing mode. The pattern does not change the * current mode. * @param string $pattern Perl style regex, but ( and ) * lose the usual meaning. * @param string $mode Should only apply this * pattern when dealing with * this type of input. * @access public */ function addPattern($pattern, $mode = "accept") { if (! isset($this->_regexes[$mode])) { $this->_regexes[$mode] = new ParallelRegex($this->_case); } $this->_regexes[$mode]->addPattern($pattern); if (! isset($this->_mode_handlers[$mode])) { $this->_mode_handlers[$mode] = $mode; } } /** * Adds a pattern that will enter a new parsing * mode. Useful for entering parenthesis, strings, * tags, etc. * @param string $pattern Perl style regex, but ( and ) * lose the usual meaning. * @param string $mode Should only apply this * pattern when dealing with * this type of input. * @param string $new_mode Change parsing to this new * nested mode. * @access public */ function addEntryPattern($pattern, $mode, $new_mode) { if (! isset($this->_regexes[$mode])) { $this->_regexes[$mode] = new ParallelRegex($this->_case); } $this->_regexes[$mode]->addPattern($pattern, $new_mode); if (! isset($this->_mode_handlers[$new_mode])) { $this->_mode_handlers[$new_mode] = $new_mode; } } /** * Adds a pattern that will exit the current mode * and re-enter the previous one. * @param string $pattern Perl style regex, but ( and ) * lose the usual meaning. * @param string $mode Mode to leave. * @access public */ function addExitPattern($pattern, $mode) { if (! isset($this->_regexes[$mode])) { $this->_regexes[$mode] = new ParallelRegex($this->_case); } $this->_regexes[$mode]->addPattern($pattern, "__exit"); if (! isset($this->_mode_handlers[$mode])) { $this->_mode_handlers[$mode] = $mode; } } /** * Adds a pattern that has a special mode. Acts as an entry * and exit pattern in one go, effectively calling a special * parser handler for this token only. * @param string $pattern Perl style regex, but ( and ) * lose the usual meaning. * @param string $mode Should only apply this * pattern when dealing with * this type of input. * @param string $special Use this mode for this one token. * @access public */ function addSpecialPattern($pattern, $mode, $special) { if (! isset($this->_regexes[$mode])) { $this->_regexes[$mode] = new ParallelRegex($this->_case); } $this->_regexes[$mode]->addPattern($pattern, "_$special"); if (! isset($this->_mode_handlers[$special])) { $this->_mode_handlers[$special] = $special; } } /** * Adds a mapping from a mode to another handler. * @param string $mode Mode to be remapped. * @param string $handler New target handler. * @access public */ function mapHandler($mode, $handler) { $this->_mode_handlers[$mode] = $handler; } /** * Splits the page text into tokens. Will fail * if the handlers report an error or if no * content is consumed. If successful then each * unparsed and parsed token invokes a call to the * held listener. * @param string $raw Raw HTML text. * @return boolean True on success, else false. * @access public */ function parse($raw) { if (! isset($this->_parser)) { return false; } $length = strlen($raw); while (is_array($parsed = $this->_reduce($raw))) { list($raw, $unmatched, $matched, $mode) = $parsed; if (! $this->_dispatchTokens($unmatched, $matched, $mode)) { return false; } if ($raw === '') { return true; } if (strlen($raw) == $length) { return false; } $length = strlen($raw); } if (! $parsed) { return false; } return $this->_invokeParser($raw, LEXER_UNMATCHED); } /** * Sends the matched token and any leading unmatched * text to the parser changing the lexer to a new * mode if one is listed. * @param string $unmatched Unmatched leading portion. * @param string $matched Actual token match. * @param string $mode Mode after match. A boolean * false mode causes no change. * @return boolean False if there was any error * from the parser. * @access private */ function _dispatchTokens($unmatched, $matched, $mode = false) { if (! $this->_invokeParser($unmatched, LEXER_UNMATCHED)) { return false; } if (is_bool($mode)) { return $this->_invokeParser($matched, LEXER_MATCHED); } if ($this->_isModeEnd($mode)) { if (! $this->_invokeParser($matched, LEXER_EXIT)) { return false; } return $this->_mode->leave(); } if ($this->_isSpecialMode($mode)) { $this->_mode->enter($this->_decodeSpecial($mode)); if (! $this->_invokeParser($matched, LEXER_SPECIAL)) { return false; } return $this->_mode->leave(); } $this->_mode->enter($mode); return $this->_invokeParser($matched, LEXER_ENTER); } /** * Tests to see if the new mode is actually to leave * the current mode and pop an item from the matching * mode stack. * @param string $mode Mode to test. * @return boolean True if this is the exit mode. * @access private */ function _isModeEnd($mode) { return ($mode === "__exit"); } /** * Test to see if the mode is one where this mode * is entered for this token only and automatically * leaves immediately afterwoods. * @param string $mode Mode to test.
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?