📄 importusemodwiki.php
字号:
<?php/** * Import data from a UseModWiki into a MediaWiki wiki * 2003-02-09 Brion VIBBER <brion@pobox.com> * Based loosely on Magnus's code from 2001-2002 * * Updated limited version to get something working temporarily * 2003-10-09 * Be sure to run the link & index rebuilding scripts! * * Some more munging for charsets etc * 2003-11-28 * * Partial fix for pages starting with lowercase letters (??) * and CamelCase and /Subpage link conversion * 2004-11-17 * * Rewrite output to create Special:Export format for import * instead of raw SQL. Should be 'future-proof' against future * schema changes. * 2005-03-14 * * @todo document * @package MediaWiki * @subpackage Maintenance */if( php_sapi_name() != 'cli' ) { echo "Please customize the settings and run me from the command line."; die( -1 );}/** Set these correctly! */$wgImportEncoding = "CP1252"; /* We convert all to UTF-8 */$wgRootDirectory = "/kalman/Projects/wiki2002/wiki/lib-http/db/wiki";/* On a large wiki, you might run out of memory */@ini_set( 'memory_limit', '40M' );/* globals */$wgFieldSeparator = "\xb3"; # Some wikis may use different char $FS = $wgFieldSeparator ; $FS1 = $FS."1" ; $FS2 = $FS."2" ; $FS3 = $FS."3" ;# Unicode sanitization toolsrequire_once( '../includes/normal/UtfNormal.php' );$usercache = array();importPages();# ------------------------------------------------------------------------------function importPages(){ global $wgRootDirectory; $gt = '>'; echo <<<END<?xml version="1.0" encoding="UTF-8" ?$gt<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.1/ http://www.mediawiki.org/xml/export-0.1.xsd" version="0.1" xml:lang="en"><!-- generated by importUseModWiki.php -->END; $letters = array( 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' ); foreach( $letters as $letter ) { $dir = "$wgRootDirectory/page/$letter"; if( is_dir( $dir ) ) importPageDirectory( $dir ); } echo <<<END</mediawiki>END;}function importPageDirectory( $dir, $prefix = "" ){ echo "\n<!-- Checking page directory " . xmlCommentSafe( $dir ) . " -->\n"; $mydir = opendir( $dir ); while( $entry = readdir( $mydir ) ) { if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) { echo importPage( $prefix . $m[1] ); } else { if( is_dir( "$dir/$entry" ) ) { if( $entry != '.' && $entry != '..' ) { importPageDirectory( "$dir/$entry", "$entry/" ); } } else { echo "<!-- File '" . xmlCommentSafe( $entry ) . "' doesn't seem to contain an article. Skipping. -->\n"; } } }}# ------------------------------------------------------------------------------/* fetch_ functions Grab a given item from the database */function useModFilename( $title ) { $c = substr( $title, 0, 1 ); if(preg_match( '/[A-Z]/i', $c ) ) { return strtoupper( $c ) . "/$title"; } return "other/$title";}function fetchPage( $title ){ global $FS,$FS1,$FS2,$FS3, $wgRootDirectory; $fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db"; if( !file_exists( $fname ) ) { echo "Couldn't open file '$fname' for page '$title'.\n"; die( -1 ); } $page = splitHash( $FS1, file_get_contents( $fname ) ); $section = splitHash( $FS2, $page["text_default"] ); $text = splitHash( $FS3, $section["data"] ); return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] , "minor" => $text["minor"] , "ts" => $section["ts"] , "username" => $section["username"] , "host" => $section["host"] ) );}function fetchKeptPages( $title ){ global $FS,$FS1,$FS2,$FS3, $wgRootDirectory, $wgTimezoneCorrection; $fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp"; if( !file_exists( $fname ) ) return array(); $keptlist = explode( $FS1, file_get_contents( $fname ) ); array_shift( $keptlist ); # Drop the junk at beginning of file $revisions = array(); foreach( $keptlist as $rev ) { $section = splitHash( $FS2, $rev ); $text = splitHash( $FS3, $section["data"] ); if ( $text["text"] && $text["minor"] != "" && ( $section["ts"]*1 > 0 ) ) { array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] , "minor" => $text["minor"] , "ts" => $section["ts"] , "username" => $section["username"] , "host" => $section["host"] ) ) ); } else { echo "<!-- skipped a bad old revision -->\n"; } } return $revisions;}function splitHash ( $sep , $str ) { $temp = explode ( $sep , $str ) ; $ret = array () ; for ( $i = 0; $i+1 < count ( $temp ) ; $i++ ) { $ret[$temp[$i]] = $temp[++$i] ; } return $ret ; }/* import_ functions Take a fetched item and produce SQL */function checkUserCache( $name, $host ){ global $usercache; if( $name ) { if( in_array( $name, $usercache ) ) { $userid = $usercache[$name]; } else { # If we haven't imported user accounts $userid = 0; } $username = str_replace( '_', ' ', $name ); } else { $userid = 0; $username = $host; } return array( $userid, $username );}function importPage( $title ){ global $usercache; echo "\n<!-- Importing page " . xmlCommentSafe( $title ) . " -->\n"; $page = fetchPage( $title ); $newtitle = xmlsafe( str_replace( '_', ' ', recodeText( $title ) ) ); $munged = mungeFormat( $page->text ); if( $munged != $page->text ) { /** * Save a *new* revision with the conversion, and put the * previous last version into the history. */ $next = array2object( array( 'text' => $munged, 'minor' => 1, 'username' => 'Conversion script', 'host' => '127.0.0.1', 'ts' => time(), 'summary' => 'link fix', ) ); $revisions = array( $page, $next ); } else { /** * Current revision: */ $revisions = array( $page ); } $xml = <<<END <page> <title>$newtitle</title>END; # History $revisions = array_merge( $revisions, fetchKeptPages( $title ) ); if(count( $revisions ) == 0 ) { return $sql; } foreach( $revisions as $rev ) { $text = xmlsafe( recodeText( $rev->text ) ); $minor = ($rev->minor ? '<minor/>' : ''); list( $userid, $username ) = checkUserCache( $rev->username, $rev->host ); $username = xmlsafe( recodeText( $username ) ); $timestamp = xmlsafe( timestamp2ISO8601( $rev->ts ) ); $comment = xmlsafe( recodeText( $rev->summary ) ); $xml .= <<<END <revision> <timestamp>$timestamp</timestamp> <contributor><username>$username</username></contributor> $minor <comment>$comment</comment> <text>$text</text> </revision>END; } $xml .= "</page>\n\n"; return $xml;}# Whee!function recodeText( $string ) { global $wgImportEncoding; # For currently latin-1 wikis $string = str_replace( "\r\n", "\n", $string ); $string = @iconv( $wgImportEncoding, "UTF-8", $string ); $string = wfMungeToUtf8( $string ); # Any old Ӓ stuff return $string;}function wfUtf8Sequence($codepoint) { if($codepoint < 0x80) return chr($codepoint); if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) . chr($codepoint & 0x3f | 0x80); if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) . chr($codepoint >> 6 & 0x3f | 0x80) . chr($codepoint & 0x3f | 0x80); if($codepoint < 0x100000) return chr($codepoint >> 18 & 0x07 | 0xf0) . # Double-check this chr($codepoint >> 12 & 0x3f | 0x80) . chr($codepoint >> 6 & 0x3f | 0x80) . chr($codepoint & 0x3f | 0x80); # Doesn't yet handle outside the BMP return "&#$codepoint;";}function wfMungeToUtf8($string) { $string = preg_replace ( '/&#([0-9]+);/e', 'wfUtf8Sequence($1)', $string ); $string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string ); # Should also do named entities here return $string;}function timestamp2ISO8601( $ts ) { #2003-08-05T18:30:02Z return gmdate( 'Y-m-d', $ts ) . 'T' . gmdate( 'H:i:s', $ts ) . 'Z';}function xmlsafe( $string ) { /** * The page may contain old data which has not been properly normalized. * Invalid UTF-8 sequences or forbidden control characters will make our * XML output invalid, so be sure to strip them out. */ $string = UtfNormal::cleanUp( $string ); $string = htmlspecialchars( $string ); return $string;}function xmlCommentSafe( $text ) { return str_replace( '--', '\\-\\-', xmlsafe( recodeText( $text ) ) );}function array2object( $arr ) { $o = (object)0; foreach( $arr as $x => $y ) { $o->$x = $y; } return $o;}/** * Make CamelCase and /Talk links work */function mungeFormat( $text ) { global $nowiki; $nowiki = array(); $staged = preg_replace_callback( '/(<nowiki>.*?<\\/nowiki>|(?:http|https|ftp):\\S+|\[\[[^]\\n]+]])/s', 'nowikiPlaceholder', $text ); # This is probably not 100% correct, I'm just # glancing at the UseModWiki code. $upper = "[A-Z]"; $lower = "[a-z_0-9]"; $any = "[A-Za-z_0-9]"; $camel = "(?:$upper+$lower+$upper+$any*)"; $subpage = "(?:\\/$any+)"; $substart = "(?:\\/$upper$any*)"; $munged = preg_replace( "/(?!\\[\\[)($camel$subpage*|$substart$subpage*)\\b(?!\\]\\]|>)/", '[[$1]]', $staged ); $final = preg_replace( '/' . preg_quote( placeholder() ) . '/es', 'array_shift( $nowiki )', $munged ); return $final;}function placeholder( $x = null ) { return '\xffplaceholder\xff';}function nowikiPlaceholder( $matches ) { global $nowiki; $nowiki[] = $matches[1]; return placeholder();}?>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -