parser.php
来自「php 开发的内容管理系统」· PHP 代码 · 共 2,095 行 · 第 1/5 页
PHP
2,095 行
<?php/** * File for Parser and related classes * * @package MediaWiki * @subpackage Parser *//** * Update this version number when the ParserOutput format * changes in an incompatible way, so the parser cache * can automatically discard old data. */define( 'MW_PARSER_VERSION', '1.6.1' );/** * Variable substitution O(N^2) attack * * Without countermeasures, it would be possible to attack the parser by saving * a page filled with a large number of inclusions of large pages. The size of * the generated page would be proportional to the square of the input size. * Hence, we limit the number of inclusions of any given page, thus bringing any * attack back to O(N). */define( 'MAX_INCLUDE_REPEAT', 100 );define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Milliondefine( 'RLH_FOR_UPDATE', 1 );# Allowed values for $mOutputTypedefine( 'OT_HTML', 1 );define( 'OT_WIKI', 2 );define( 'OT_MSG' , 3 );# Flags for setFunctionHookdefine( 'SFH_NO_HASH', 1 );# string parameter for extractTags which will cause it# to strip HTML comments in addition to regular# <XML>-style tags. This should not be anything we# may want to use in wikisyntaxdefine( 'STRIP_COMMENTS', 'HTMLCommentStrip' );# Constants needed for external link processingdefine( 'HTTP_PROTOCOLS', 'http:\/\/|https:\/\/' );# Everything except bracket, space, or control charactersdefine( 'EXT_LINK_URL_CLASS', '[^][<>"\\x00-\\x20\\x7F]' );# Including space, but excluding newlinesdefine( 'EXT_LINK_TEXT_CLASS', '[^\]\\x0a\\x0d]' );define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );define( 'EXT_LINK_BRACKETED', '/\[(\b(' . wfUrlProtocols() . ')'. EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );define( 'EXT_IMAGE_REGEX', '/^('.HTTP_PROTOCOLS.')'. # Protocol '('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename);// State constants for the definition list colon extractiondefine( 'MW_COLON_STATE_TEXT', 0 );define( 'MW_COLON_STATE_TAG', 1 );define( 'MW_COLON_STATE_TAGSTART', 2 );define( 'MW_COLON_STATE_CLOSETAG', 3 );define( 'MW_COLON_STATE_TAGSLASH', 4 );define( 'MW_COLON_STATE_COMMENT', 5 );define( 'MW_COLON_STATE_COMMENTDASH', 6 );define( 'MW_COLON_STATE_COMMENTDASHDASH', 7 );/** * PHP Parser * * Processes wiki markup * * <pre> * There are three main entry points into the Parser class: * parse() * produces HTML output * preSaveTransform(). * produces altered wiki markup. * transformMsg() * performs brace substitution on MediaWiki messages * * Globals used: * objects: $wgLang, $wgContLang * * NOT $wgArticle, $wgUser or $wgTitle. Keep them away! * * settings: * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*, * $wgNamespacesWithSubpages, $wgAllowExternalImages*, * $wgLocaltimezone, $wgAllowSpecialInclusion* * * * only within ParserOptions * </pre> * * @package MediaWiki */class Parser{ /**#@+ * @private */ # Persistent: var $mTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables; # Cleared with clearState(): var $mOutput, $mAutonumber, $mDTopen, $mStripState = array(); var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; var $mInterwikiLinkHolders, $mLinkHolders, $mUniqPrefix; var $mTemplates, // cache of already loaded templates, avoids // multiple SQL queries for the same string $mTemplatePath; // stores an unsorted hash of all the templates already loaded // in this path. Used for loop detection. # Temporary # These are variables reset at least once per parse regardless of $clearState var $mOptions, // ParserOptions object $mTitle, // Title context, used for self-link rendering and similar things $mOutputType, // Output type, one of the OT_xxx constants $mRevisionId; // ID to display in {{REVISIONID}} tags /**#@-*/ /** * Constructor * * @public */ function Parser() { $this->mTagHooks = array(); $this->mFunctionHooks = array(); $this->mFunctionSynonyms = array( 0 => array(), 1 => array() ); $this->mFirstCall = true; } /** * Do various kinds of initialisation on the first call of the parser */ function firstCallInit() { if ( !$this->mFirstCall ) { return; } wfProfileIn( __METHOD__ ); global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions; $this->setHook( 'pre', array( $this, 'renderPreTag' ) ); $this->setFunctionHook( MAG_NS, array( 'CoreParserFunctions', 'ns' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_URLENCODE, array( 'CoreParserFunctions', 'urlencode' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_LCFIRST, array( 'CoreParserFunctions', 'lcfirst' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_UCFIRST, array( 'CoreParserFunctions', 'ucfirst' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_LC, array( 'CoreParserFunctions', 'lc' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_UC, array( 'CoreParserFunctions', 'uc' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_LOCALURL, array( 'CoreParserFunctions', 'localurl' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_LOCALURLE, array( 'CoreParserFunctions', 'localurle' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_FULLURL, array( 'CoreParserFunctions', 'fullurl' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_FULLURLE, array( 'CoreParserFunctions', 'fullurle' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_FORMATNUM, array( 'CoreParserFunctions', 'formatnum' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_GRAMMAR, array( 'CoreParserFunctions', 'grammar' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_PLURAL, array( 'CoreParserFunctions', 'plural' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_NUMBEROFPAGES, array( 'CoreParserFunctions', 'numberofpages' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_NUMBEROFUSERS, array( 'CoreParserFunctions', 'numberofusers' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_NUMBEROFARTICLES, array( 'CoreParserFunctions', 'numberofarticles' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_NUMBEROFFILES, array( 'CoreParserFunctions', 'numberoffiles' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_NUMBEROFADMINS, array( 'CoreParserFunctions', 'numberofadmins' ), SFH_NO_HASH ); $this->setFunctionHook( MAG_LANGUAGE, array( 'CoreParserFunctions', 'language' ), SFH_NO_HASH ); if ( $wgAllowDisplayTitle ) { $this->setFunctionHook( MAG_DISPLAYTITLE, array( 'CoreParserFunctions', 'displaytitle' ), SFH_NO_HASH ); } if ( $wgAllowSlowParserFunctions ) { $this->setFunctionHook( MAG_PAGESINNAMESPACE, array( 'CoreParserFunctions', 'pagesinnamespace' ), SFH_NO_HASH ); } $this->initialiseVariables(); $this->mFirstCall = false; wfProfileOut( __METHOD__ ); } /** * Clear Parser state * * @private */ function clearState() { if ( $this->mFirstCall ) { $this->firstCallInit(); } $this->mOutput = new ParserOutput; $this->mAutonumber = 0; $this->mLastSection = ''; $this->mDTopen = false; $this->mIncludeCount = array(); $this->mStripState = array(); $this->mArgStack = array(); $this->mInPre = false; $this->mInterwikiLinkHolders = array( 'texts' => array(), 'titles' => array() ); $this->mLinkHolders = array( 'namespaces' => array(), 'dbkeys' => array(), 'queries' => array(), 'texts' => array(), 'titles' => array() ); $this->mRevisionId = null; /** * Prefix for temporary replacement strings for the multipass parser. * \x07 should never appear in input as it's disallowed in XML. * Using it at the front also gives us a little extra robustness * since it shouldn't match when butted up against identifier-like * string constructs. */ $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); # Clear these on every parse, bug 4549 $this->mTemplates = array(); $this->mTemplatePath = array(); $this->mShowToc = true; $this->mForceTocPosition = false; wfRunHooks( 'ParserClearState', array( &$this ) ); } /** * Accessor for mUniqPrefix. * * @public */ function UniqPrefix() { return $this->mUniqPrefix; } /** * Convert wikitext to HTML * Do not call this function recursively. * * @private * @param string $text Text we want to parse * @param Title &$title A title object * @param array $options * @param boolean $linestart * @param boolean $clearState * @param int $revid number to pass in {{REVISIONID}} * @return ParserOutput a ParserOutput */ function parse( $text, &$title, $options, $linestart = true, $clearState = true, $revid = null ) { /** * First pass--just handle <nowiki> sections, pass the rest off * to internalParse() which does all the real work. */ global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang; $fname = 'Parser::parse'; wfProfileIn( $fname ); if ( $clearState ) { $this->clearState(); } $this->mOptions = $options; $this->mTitle =& $title; $this->mRevisionId = $revid; $this->mOutputType = OT_HTML; //$text = $this->strip( $text, $this->mStripState ); // VOODOO MAGIC FIX! Sometimes the above segfaults in PHP5. $x =& $this->mStripState; wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$x ) ); $text = $this->strip( $text, $x ); wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$x ) ); # Hook to suspend the parser in this state if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$x ) ) ) { wfProfileOut( $fname ); return $text ; } $text = $this->internalParse( $text ); $text = $this->unstrip( $text, $this->mStripState ); # Clean up special characters, only run once, next-to-last before doBlockLevels $fixtags = array( # french spaces, last one Guillemet-left # only if there is something before the space '/(.) (?=\\?|:|;|!|\\302\\273)/' => '\\1 \\2', # french spaces, Guillemet-right '/(\\302\\253) /' => '\\1 ', ); $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); # only once and last $text = $this->doBlockLevels( $text, $linestart ); $this->replaceLinkHolders( $text ); # the position of the parserConvert() call should not be changed. it # assumes that the links are all replaced and the only thing left # is the <nowiki> mark. # Side-effects: this calls $this->mOutput->setTitleText() $text = $wgContLang->parserConvert( $text, $this ); $text = $this->unstripNoWiki( $text, $this->mStripState ); wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) ); $text = Sanitizer::normalizeCharReferences( $text ); if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) { $text = Parser::tidy($text); } else { # attempt to sanitize at least some nesting problems # (bug #2702 and quite a few others) $tidyregs = array( # ''Something [http://www.cool.com cool''] --> # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a> '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' => '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', # fix up an anchor inside another anchor, only # at least for a single single nested link (bug 3695) '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' => '\\1\\2</a>\\3</a>\\1\\4</a>', # fix div inside inline elements- doBlockLevels won't wrap a line which # contains a div, so fix it up here; replace # div with escaped text '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' => '\\1\\3<div\\5>\\6</div>\\8\\9', # remove empty italic or bold tag pairs, some # introduced by rules above '/<([bi])><\/\\1>/' => '' ); $text = preg_replace( array_keys( $tidyregs ), array_values( $tidyregs ), $text ); } wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) ); $this->mOutput->setText( $text ); wfProfileOut( $fname ); return $this->mOutput; } /** * Get a random string * * @private * @static */ function getRandomString() { return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff)); } function &getTitle() { return $this->mTitle; } function getOptions() { return $this->mOptions; } function getFunctionLang() { global $wgLang, $wgContLang; return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang; } /** * Replaces all occurrences of HTML-style comments and the given tags * in the text with a random marker and returns teh next text. The output * parameter $matches will be an associative array filled with data in * the form: * 'UNIQ-xxxxx' => array( * 'element', * 'tag content', * array( 'param' => 'x' ), * '<element param="x">tag content</element>' ) ) * * @param $elements list of element names. Comments are always extracted. * @param $text Source text string. * @param $uniq_prefix * * @private * @static */ function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){ $rand = Parser::getRandomString(); $n = 1; $stripped = ''; $matches = array(); $taglist = implode( '|', $elements ); $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i"; while ( '' != $text ) { $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); $stripped .= $p[0]; if( count( $p ) < 5 ) { break; } if( count( $p ) > 5 ) { // comment $element = $p[4]; $attributes = ''; $close = ''; $inside = $p[5]; } else { // tag $element = $p[1]; $attributes = $p[2]; $close = $p[3]; $inside = $p[4];
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?