importusemodwiki.php

来自「php 开发的内容管理系统」· PHP 代码 · 共 366 行
PHP
366 行
<?php/** * Import data from a UseModWiki into a MediaWiki wiki * 2003-02-09 Brion VIBBER <brion@pobox.com> * Based loosely on Magnus's code from 2001-2002 * * Updated limited version to get something working temporarily * 2003-10-09 * Be sure to run the link & index rebuilding scripts! * * Some more munging for charsets etc * 2003-11-28 * * Partial fix for pages starting with lowercase letters (??) * and CamelCase and /Subpage link conversion * 2004-11-17 * * Rewrite output to create Special:Export format for import * instead of raw SQL. Should be 'future-proof' against future * schema changes. * 2005-03-14 * * @todo document * @package MediaWiki * @subpackage Maintenance */if( php_sapi_name() != 'cli' ) {	echo "Please customize the settings and run me from the command line.";	die( -1 );}/** Set these correctly! */$wgImportEncoding = "CP1252"; /* We convert all to UTF-8 */$wgRootDirectory = "/kalman/Projects/wiki2002/wiki/lib-http/db/wiki";/* On a large wiki, you might run out of memory */@ini_set( 'memory_limit', '40M' );/* globals */$wgFieldSeparator = "\xb3"; # Some wikis may use different char	$FS = $wgFieldSeparator ;	$FS1 = $FS."1" ;	$FS2 = $FS."2" ;	$FS3 = $FS."3" ;# Unicode sanitization toolsrequire_once( '../includes/normal/UtfNormal.php' );$usercache = array();importPages();# ------------------------------------------------------------------------------function importPages(){	global $wgRootDirectory;	$gt = '>';	echo <<<END<?xml version="1.0" encoding="UTF-8" ?$gt<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.1/"           xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"           xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.1/                               http://www.mediawiki.org/xml/export-0.1.xsd"           version="0.1"           xml:lang="en"><!-- generated by importUseModWiki.php -->END;	$letters = array(		'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',		'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',		'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' );	foreach( $letters as $letter ) {		$dir = "$wgRootDirectory/page/$letter";		if( is_dir( $dir ) )			importPageDirectory( $dir );	}	echo <<<END</mediawiki>END;}function importPageDirectory( $dir, $prefix = "" ){	echo "\n<!-- Checking page directory " . xmlCommentSafe( $dir ) . " -->\n";	$mydir = opendir( $dir );	while( $entry = readdir( $mydir ) ) {		if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {			echo importPage( $prefix . $m[1] );		} else {			if( is_dir( "$dir/$entry" ) ) {				if( $entry != '.' && $entry != '..' ) {					importPageDirectory( "$dir/$entry", "$entry/" );				}			} else {				echo "<!-- File '" . xmlCommentSafe( $entry ) . "' doesn't seem to contain an article. Skipping. -->\n";			}		}	}}# ------------------------------------------------------------------------------/* fetch_ functions	Grab a given item from the database	*/function useModFilename( $title ) {	$c = substr( $title, 0, 1 );	if(preg_match( '/[A-Z]/i', $c ) ) {		return strtoupper( $c ) . "/$title";	}	return "other/$title";}function fetchPage( $title ){	global $FS,$FS1,$FS2,$FS3, $wgRootDirectory;	$fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db";	if( !file_exists( $fname ) ) {		echo "Couldn't open file '$fname' for page '$title'.\n";		die( -1 );	}	$page = splitHash( $FS1, file_get_contents( $fname ) );	$section = splitHash( $FS2, $page["text_default"] );	$text = splitHash( $FS3, $section["data"] );	return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] ,		"minor" => $text["minor"] , "ts" => $section["ts"] ,		"username" => $section["username"] , "host" => $section["host"] ) );}function fetchKeptPages( $title ){	global $FS,$FS1,$FS2,$FS3, $wgRootDirectory, $wgTimezoneCorrection;	$fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp";	if( !file_exists( $fname ) ) return array();	$keptlist = explode( $FS1, file_get_contents( $fname ) );	array_shift( $keptlist ); # Drop the junk at beginning of file	$revisions = array();	foreach( $keptlist as $rev ) {		$section = splitHash( $FS2, $rev );		$text = splitHash( $FS3, $section["data"] );		if ( $text["text"] && $text["minor"] != "" && ( $section["ts"]*1 > 0 ) ) {			array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] ,				"minor" => $text["minor"] , "ts" => $section["ts"] ,				"username" => $section["username"] , "host" => $section["host"] ) ) );		} else {			echo "<!-- skipped a bad old revision -->\n";		}	}	return $revisions;}function splitHash ( $sep , $str ) {	$temp = explode ( $sep , $str ) ;	$ret = array () ;	for ( $i = 0; $i+1 < count ( $temp ) ; $i++ ) {		$ret[$temp[$i]] = $temp[++$i] ;		}	return $ret ;	}/* import_ functions	Take a fetched item and produce SQL	*/function checkUserCache( $name, $host ){	global $usercache;	if( $name ) {		if( in_array( $name, $usercache ) ) {			$userid = $usercache[$name];		} else {			# If we haven't imported user accounts			$userid = 0;		}		$username = str_replace( '_', ' ', $name );	} else {		$userid = 0;		$username = $host;	}	return array( $userid, $username );}function importPage( $title ){	global $usercache;	echo "\n<!-- Importing page " . xmlCommentSafe( $title ) . " -->\n";	$page = fetchPage( $title );	$newtitle = xmlsafe( str_replace( '_', ' ', recodeText( $title ) ) );	$munged = mungeFormat( $page->text );	if( $munged != $page->text ) {		/**		 * Save a *new* revision with the conversion, and put the		 * previous last version into the history.		 */		$next = array2object( array(			'text'     => $munged,			'minor'    => 1,			'username' => 'Conversion script',			'host'     => '127.0.0.1',			'ts'       => time(),			'summary'  => 'link fix',			) );		$revisions = array( $page, $next );	} else {		/**		 * Current revision:		 */		$revisions = array( $page );	}	$xml = <<<END	<page>		<title>$newtitle</title>END;	# History	$revisions = array_merge( $revisions, fetchKeptPages( $title ) );	if(count( $revisions ) == 0 ) {		return $sql;	}	foreach( $revisions as $rev ) {		$text      = xmlsafe( recodeText( $rev->text ) );		$minor     = ($rev->minor ? '<minor/>' : '');		list( $userid, $username ) = checkUserCache( $rev->username, $rev->host );		$username  = xmlsafe( recodeText( $username ) );		$timestamp = xmlsafe( timestamp2ISO8601( $rev->ts ) );		$comment   = xmlsafe( recodeText( $rev->summary ) );		$xml .= <<<END		<revision>			<timestamp>$timestamp</timestamp>			<contributor><username>$username</username></contributor>			$minor			<comment>$comment</comment>			<text>$text</text>		</revision>END;	}	$xml .= "</page>\n\n";	return $xml;}# Whee!function recodeText( $string ) {	global $wgImportEncoding;	# For currently latin-1 wikis	$string = str_replace( "\r\n", "\n", $string );	$string = @iconv( $wgImportEncoding, "UTF-8", $string );	$string = wfMungeToUtf8( $string ); # Any old &#1234; stuff	return $string;}function wfUtf8Sequence($codepoint) {	if($codepoint <     0x80) return chr($codepoint);	if($codepoint <    0x800) return chr($codepoint >>  6 & 0x3f | 0xc0) .                                     chr($codepoint       & 0x3f | 0x80);    if($codepoint <  0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .                                     chr($codepoint >>  6 & 0x3f | 0x80) .                                     chr($codepoint       & 0x3f | 0x80);	if($codepoint < 0x100000) return chr($codepoint >> 18 & 0x07 | 0xf0) . # Double-check this	                                 chr($codepoint >> 12 & 0x3f | 0x80) .                                     chr($codepoint >>  6 & 0x3f | 0x80) .                                     chr($codepoint       & 0x3f | 0x80);	# Doesn't yet handle outside the BMP	return "&#$codepoint;";}function wfMungeToUtf8($string) {	$string = preg_replace ( '/&#([0-9]+);/e', 'wfUtf8Sequence($1)', $string );	$string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string );	# Should also do named entities here	return $string;}function timestamp2ISO8601( $ts ) {	#2003-08-05T18:30:02Z	return gmdate( 'Y-m-d', $ts ) . 'T' . gmdate( 'H:i:s', $ts ) . 'Z';}function xmlsafe( $string ) {	/**	 * The page may contain old data which has not been properly normalized.	 * Invalid UTF-8 sequences or forbidden control characters will make our	 * XML output invalid, so be sure to strip them out.	 */	$string = UtfNormal::cleanUp( $string );	$string = htmlspecialchars( $string );	return $string;}function xmlCommentSafe( $text ) {	return str_replace( '--', '\\-\\-', xmlsafe( recodeText( $text ) ) );}function array2object( $arr ) {	$o = (object)0;	foreach( $arr as $x => $y ) {		$o->$x = $y;	}	return $o;}/** * Make CamelCase and /Talk links work */function mungeFormat( $text ) {	global $nowiki;	$nowiki = array();	$staged = preg_replace_callback(		'/(<nowiki>.*?<\\/nowiki>|(?:http|https|ftp):\\S+|\[\[[^]\\n]+]])/s',		'nowikiPlaceholder', $text );	# This is probably not  100% correct, I'm just	# glancing at the UseModWiki code.	$upper   = "[A-Z]";	$lower   = "[a-z_0-9]";	$any     = "[A-Za-z_0-9]";	$camel   = "(?:$upper+$lower+$upper+$any*)";	$subpage = "(?:\\/$any+)";	$substart = "(?:\\/$upper$any*)";	$munged = preg_replace( "/(?!\\[\\[)($camel$subpage*|$substart$subpage*)\\b(?!\\]\\]|>)/",		'[[$1]]', $staged );	$final = preg_replace( '/' . preg_quote( placeholder() ) . '/es',		'array_shift( $nowiki )', $munged );	return $final;}function placeholder( $x = null ) {	return '\xffplaceholder\xff';}function nowikiPlaceholder( $matches ) {	global $nowiki;	$nowiki[] = $matches[1];	return placeholder();}?>
importusemodwiki.php - 源码说明

本页面展示了「php 开发的内容管理系统」中的 importusemodwiki.php 源码文件，采用 PHP 编程语言编写，共 366 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与php相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?