dumphtml.inc
来自「php 开发的内容管理系统」· INC 代码 · 共 651 行 · 第 1/2 页
INC
651 行
<?php/** * @package MediaWiki * @subpackage Maintenance */define( 'REPORTING_INTERVAL', 10 );require_once( 'includes/ImagePage.php' );require_once( 'includes/CategoryPage.php' );require_once( 'includes/RawPage.php' );class DumpHTML { # Destination directory var $dest; # Show interlanguage links? var $interwiki = true; # Depth of HTML directory tree var $depth = 3; # Directory that commons images are copied into var $sharedStaticPath; # Relative path to image directory var $imageRel = 'upload'; # Copy commons images instead of symlinking var $forceCopy = false; # Make links assuming the script path is in the same directory as # the destination var $alternateScriptPath = false; # Original values of various globals var $oldArticlePath = false, $oldCopyrightIcon = false; # Has setupGlobals been called? var $setupDone = false; # List of raw pages used in the current article var $rawPages; # Skin to use var $skin = 'dumphtml'; function DumpHTML( $settings ) { foreach ( $settings as $var => $value ) { $this->$var = $value; } } /** * Write a set of articles specified by start and end page_id * Skip categories and images, they will be done separately */ function doArticles( $start, $end = false ) { $fname = 'DumpHTML::doArticles'; $this->setupGlobals(); if ( $end === false ) { $dbr =& wfGetDB( DB_SLAVE ); $end = $dbr->selectField( 'page', 'max(page_id)', false, $fname ); } $mainPageObj = Title::newMainPage(); $mainPage = $mainPageObj->getPrefixedDBkey(); for ($id = $start; $id <= $end; $id++) { wfWaitForSlaves( 20 ); if ( !($id % REPORTING_INTERVAL) ) { print "Processing ID: $id\r"; } if ( !($id % (REPORTING_INTERVAL*10) ) ) { print "\n"; } $title = Title::newFromID( $id ); if ( $title ) { $ns = $title->getNamespace() ; if ( $ns != NS_CATEGORY && $title->getPrefixedDBkey() != $mainPage ) { $this->doArticle( $title ); } } } print "\n"; } function doSpecials() { $this->doMainPage(); $this->setupGlobals(); print "Special:Categories..."; $this->doArticle( Title::makeTitle( NS_SPECIAL, 'Categories' ) ); print "\n"; } /** Write the main page as index.html */ function doMainPage() { print "Making index.html "; // Set up globals with no ../../.. in the link URLs $this->setupGlobals( 0 ); $title = Title::newMainPage(); $text = $this->getArticleHTML( $title ); $file = fopen( "{$this->dest}/index.html", "w" ); if ( !$file ) { print "\nCan't open index.html for writing\n"; return false; } fwrite( $file, $text ); fclose( $file ); print "\n"; } function doImageDescriptions() { global $wgSharedUploadDirectory; $fname = 'DumpHTML::doImageDescriptions'; $this->setupGlobals(); /** * Dump image description pages that don't have an associated article, but do * have a local image */ $dbr =& wfGetDB( DB_SLAVE ); extract( $dbr->tableNames( 'image', 'page' ) ); $res = $dbr->select( 'image', array( 'img_name' ), false, $fname ); $i = 0; print "Writing image description pages for local images\n"; $num = $dbr->numRows( $res ); while ( $row = $dbr->fetchObject( $res ) ) { wfWaitForSlaves( 10 ); if ( !( ++$i % REPORTING_INTERVAL ) ) { print "Done $i of $num\r"; } $title = Title::makeTitle( NS_IMAGE, $row->img_name ); if ( $title->getArticleID() ) { // Already done by dumpHTML continue; } $this->doArticle( $title ); } print "\n"; /** * Dump images which only have a real description page on commons */ print "Writing description pages for commons images\n"; $i = 0; for ( $hash = 0; $hash < 256; $hash++ ) { $dir = sprintf( "%01x/%02x", intval( $hash / 16 ), $hash ); $paths = array_merge( glob( "{$this->sharedStaticPath}/$dir/*" ), glob( "{$this->sharedStaticPath}/thumb/$dir/*" ) ); foreach ( $paths as $path ) { $file = basename( $path ); if ( !(++$i % REPORTING_INTERVAL ) ) { print "$i\r"; } $title = Title::makeTitle( NS_IMAGE, $file ); $this->doArticle( $title ); } } print "\n"; } function doCategories() { $fname = 'DumpHTML::doCategories'; $this->setupGlobals(); $dbr =& wfGetDB( DB_SLAVE ); print "Selecting categories..."; $sql = 'SELECT DISTINCT cl_to FROM ' . $dbr->tableName( 'categorylinks' ); $res = $dbr->query( $sql, $fname ); print "\nWriting " . $dbr->numRows( $res ). " category pages\n"; $i = 0; while ( $row = $dbr->fetchObject( $res ) ) { wfWaitForSlaves( 10 ); if ( !(++$i % REPORTING_INTERVAL ) ) { print "$i\r"; } $title = Title::makeTitle( NS_CATEGORY, $row->cl_to ); $this->doArticle( $title ); } print "\n"; } function doRedirects() { print "Doing redirects...\n"; $fname = 'DumpHTML::doRedirects'; $this->setupGlobals(); $dbr =& wfGetDB( DB_SLAVE ); $res = $dbr->select( 'page', array( 'page_namespace', 'page_title' ), array( 'page_is_redirect' => 1 ), $fname ); $num = $dbr->numRows( $res ); print "$num redirects to do...\n"; $i = 0; while ( $row = $dbr->fetchObject( $res ) ) { $title = Title::makeTitle( $row->page_namespace, $row->page_title ); if ( !(++$i % (REPORTING_INTERVAL*10) ) ) { print "Done $i of $num\n"; } $this->doArticle( $title ); } } /** Write an article specified by title */ function doArticle( $title ) { global $wgTitle, $wgSharedUploadPath, $wgSharedUploadDirectory; global $wgUploadDirectory; $this->rawPages = array(); $text = $this->getArticleHTML( $title ); if ( $text === false ) { return; } # Parse the XHTML to find the images $images = $this->findImages( $text ); $this->copyImages( $images ); # Write to file $this->writeArticle( $title, $text ); # Do raw pages wfMkdirParents( "{$this->dest}/raw", 0755 ); foreach( $this->rawPages as $record ) { list( $file, $title, $params ) = $record; $path = "{$this->dest}/raw/$file"; if ( !file_exists( $path ) ) { $article = new Article( $title ); $request = new FauxRequest( $params ); $rp = new RawPage( $article, $request ); $text = $rp->getRawText(); print "Writing $file\n"; $file = fopen( $path, 'w' ); if ( !$file ) { print("Can't open file $fullName for writing\n"); continue; } fwrite( $file, $text ); fclose( $file ); } } } /** Write the given text to the file identified by the given title object */ function writeArticle( &$title, $text ) { $filename = $this->getHashedFilename( $title ); $fullName = "{$this->dest}/$filename"; $fullDir = dirname( $fullName ); wfMkdirParents( $fullDir, 0755 ); $file = fopen( $fullName, 'w' ); if ( !$file ) { print("Can't open file $fullName for writing\n"); return; } fwrite( $file, $text ); fclose( $file ); } /** Set up globals required for parsing */ function setupGlobals( $currentDepth = NULL ) { global $wgUser, $wgTitle, $wgStylePath, $wgArticlePath, $wgMathPath; global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath; global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath; global $wgSharedThumbnailScriptPath, $wgEnableParserCache, $wgHooks, $wgServer; global $wgRightsUrl, $wgRightsText, $wgCopyrightIcon; static $oldLogo = NULL; if ( !$this->setupDone ) { $wgHooks['GetLocalURL'][] =& $this; $wgHooks['GetFullURL'][] =& $this; $this->oldArticlePath = $wgServer . $wgArticlePath; } if ( is_null( $currentDepth ) ) { $currentDepth = $this->depth; } if ( $this->alternateScriptPath ) { if ( $currentDepth == 0 ) { $wgScriptPath = '.'; } else { $wgScriptPath = '..' . str_repeat( '/..', $currentDepth - 1 ); } } else { $wgScriptPath = '..' . str_repeat( '/..', $currentDepth ); } $wgArticlePath = str_repeat( '../', $currentDepth ) . '$1'; # Logo image # Allow for repeated setup if ( !is_null( $oldLogo ) ) { $wgLogo = $oldLogo; } else { $oldLogo = $wgLogo; } if ( strpos( $wgLogo, $wgUploadPath ) === 0 ) { # If it's in the upload directory, rewrite it to the new upload directory $wgLogo = "$wgScriptPath/{$this->imageRel}/" . substr( $wgLogo, strlen( $wgUploadPath ) + 1 ); } elseif ( $wgLogo{0} == '/' ) { # This is basically heuristic # Rewrite an absolute logo path to one relative to the the script path $wgLogo = $wgScriptPath . $wgLogo; }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?