MediaWiki Bulk Page Creator
This page is obsolete. It is being retained for archival purposes. It may document extensions or features that are obsolete and/or no longer supported. Do not rely on the information here being up-to-date. The code here is old and no longer works with current versions of MediaWiki. It is retained for interest's sake. |
A Mediawiki bot written in PHP to create and prefill pages on a Mediawiki website or project. The bot takes input from a formatted input file and creates pages. The Snoopy class library is required to make this bot work.
If you have questions please contact me joncu[NOSPAM]trerATgmail DOTTcom
The program
edit<?php # PHP MediaWiki Bulk Page Creator # Version: 1.0 # Author: Jonathan Cutrer # Website: http://jcutrer.com/ # # # This program must have the Snoopy Class Library to run. # http://sourceforge.net/projects/snoopy/ # # # Syntax: php bulkinsert.php inputfile.txt # # # include "./Snoopy-1.2/Snoopy.class.php"; $snoopy = new Snoopy; $wikiroot = "http://yourwikiurl.com"; $login_url = $wikiroot . "/index.php?title=Special:Userlogin&action=submitlogin"; #$submit_url = $wikiroot . "/index.php?title=Special:Userlogin&action=submitlogin"; # Set the username and password below: $login_vars['wpName'] = "YourRobotsUsername"; $login_vars['wpPassword'] = "Password"; $login_vars['wpRemember'] = "1"; # Login to Wiki $snoopy->submit($login_url,$login_vars); # Open Source File and Read into $contents $fp = fopen($argv[1], "r"); $contents = fread($fp, filesize($argv[1])); fclose($fp); # Split $contents in $pages array $pages = split("--ENDPAGE--", $contents); # Loop for each item in pages array # During loop we will get edit page for token then submit form to create page foreach ($pages as $key => $value) { list($title, $body) = split("--ENDTITLE--", $value); echo $title; # Get rid of newlines in title $title = str_replace("\n", "", $title); # Make Safetitle for URL $safetitle = rawurlencode(str_replace(" ", "_", $title)); # Lets make sure $title contains something other than null if ($title) { # Submit to edit page for $title and get contents into $editpage if($snoopy->submit($wikiroot . "/index.php?title=" . $safetitle . "&action=edit",$login_vars)) { $editpage = $snoopy->results; } #echo "$editpage"; # Pick out Edit Token into $token $ans = preg_match('/.*value="(.*?)".*name="wpEditToken"/',$editpage, $matches); $token = $matches[1]; echo $token; # Set Post Variables before submitting $submit_vars['wpTextbox1'] = $body; $submit_vars['wpSummary'] = ""; $submit_vars['wpSection'] = ""; $submit_vars['wpEdittime'] = ""; $submit_vars['wpMinoredit'] = "1"; $submit_vars['wpSave'] = "Save page"; $submit_vars['wpEditToken'] = $token; # Submit or Post to create the page echo "Final Submit goes to:" . $wikiroot . "/index.php?title=" . $safetitle . "&action=submit"; if($snoopy->submit($wikiroot . "/index.php?title=" . $safetitle . "&action=submit", $submit_vars)) { $finalresults = $snoopy->results; } echo $finalresults; # End If Loop } # End ForEach Loop } exit; ?>
An example input file
editSample Page 1 --ENDTITLE-- __NOTOC____NOEDITSECTION__ This is the body of sample page one. This page was inserted by [http://jcutrer.com/ Mediawiki Bulk Page Creator]. If you find this software useful please give me credit by providing a link to http://jcutrer.com [[Sample Page 2]] --ENDPAGE-- Sample Page 2 --ENDTITLE-- This is sample page 2 with sections == Section 1 == [[Sample Page 1]] == Section 2 == This page was inserted by [http://jcutrer.com/ Mediawiki Bulk Page Creator]. == Section 3 == If you find this software useful please give me credit by providing a link to http://jcutrer.com --ENDPAGE--
Disclaimer & License
editMediawiki Bulk Page Creator is release under the GPL License.
Install this software at your own risk there is no warranty or support.
Related scripts
editJonathan Cutrer has developed a proprietary web version of this tool called Mediawiki CSV Import. mwcsvimport is web-based and is more flexible since it takes CSV data as the import source. You can also format the data on import with page templates. The tool generates an XML Import file. Mediawiki CSV Import. Unfortunately the source code is not free nor available online.
Here is a companion script to go with it. It sucks down data from a UseModWiki site in the format usable by this script. Try this: http://www.hudsonic.com/migwiki/bulkget-umw.php.txt
Blatant hack
editAlso a blatant hacking for image uploading in bulk:
<? # PHP MediaWiki Bulk media uploader # Version: 0 # Author: Anonymous Coward, hacking Jonathon Cutrer # # This program must have the Snoopy Class Library to run. # http://sourceforge.net/projects/snoopy/ # # Syntax: php bulkmedia.php names_and_filepaths.txt # # names_and_filepaths.txt has lines, each with a desired filename en wiki, # then a space character, then a path to the desired file to upload. no # spaces in the name or path, left as an exercise. include "./Snoopy-1.2.3/Snoopy.class.php"; $snoopy = new Snoopy; $wikiroot = "http://somewikiorother.org/root"; $login_url = $wikiroot . "/index.php?title=Special:Userlogin&action=submitlogin\ "; #$submit_url = $wikiroot . "/index.php?title=Special:Userlogin&action=submitlog\ in"; # Set the username and password below: $login_vars['wpName'] = "botname"; $login_vars['wpPassword'] = "botpass"; $login_vars['wpRemember'] = "1"; # Login to Wiki $snoopy->submit($login_url,$login_vars); # Open Source File and Read into $contents $fp = fopen($argv[1], "r"); $contents = fread($fp, filesize($argv[1])); fclose($fp); # Split $contents in $pages array $pages = split("\n", $contents); # Loop for each item in pages array # During loop we will get edit page for token then submit form to create page echo $wikiroot . "/index.php?title=Special:Upload"; echo "\n"; foreach ($pages as $key => $value) { list($fname, $fpath)=split(" ", $value); if ( $fname && $fpath ) { $formvars['wpDestFile'] = $fname; $formvars['wpUpload'] ="Upload file"; $formfiles['wpUploadFile'] = $fpath; $snoopy->set_submit_multipart(); if($snoopy->submit($wikiroot . "/index.php?title=Special:Upload\ ", $formvars, $formfiles)) { echo "success $fname\n"; } echo $snoopy->results; } } # End ForEach Loop exit; ?>
Another blatant hack
editAnother blatant hack for image uploading in bulk, debugged to work with Windows, PHP 5.3, and Mediawiki 1.15.0:
<?PHP # PHP MediaWiki bulk media (or other file type) uploader # Version: 0 # Author: Ejcaputo hacking Anonymous Coward, hacking Jonathon Cutrer # # This program must have the Snoopy Class Library to run. # http://sourceforge.net/projects/snoopy/ # # Syntax: php bulkmedia.php names_and_filepaths.txt # # names_and_filepaths.txt has lines, each with a desired file to upload, # then a pipe character, then a path to the desired filename in wiki, then a # pipe character, then a file comment. The last two items are optional, default # values will be used if they are not present. # # path to upload|dest filename|comment include "./Snoopy.class.php"; $snoopy = new Snoopy; $wikiroot = "http://wiki.yours.xyz/wiki"; $login_url = $wikiroot . "/index.php?title=Special:Userlogin&action=submitlogin\n"; #$submit_url = $wikiroot . "/index.php?title=Special:Userlogin&action=submitlogin\n"; # Set the username and password below: $login_vars['wpName'] = "yourBotUsername"; $login_vars['wpPassword'] = "password"; $login_vars['wpRemember'] = "1"; $login_vars['wpLoginattempt'] = "1"; # Login to Wiki $snoopy->submit($login_url,$login_vars); ##echo $snoopy->results; # Open Source File and Read into $contents $fp = fopen($argv[1], "r"); $contents = fread($fp, filesize($argv[1])); fclose($fp); # Split $contents in $pages array $pages = split("\r\n", $contents); ## use "\n" for Linux # Loop for each item in pages array # During loop we will get edit page for token then submit form to create page echo $wikiroot . "/index.php?title=Special:Upload"; echo "\n"; foreach ($pages as $key => $value) { $lineParts = split("\|", $value); # get path components $numParts = count($lineParts); $fpath=$lineParts[0]; if($numParts < 3) { $descr = $fpath . "<br />" . date("Y-m-d H:i:s",filemtime($fpath)); } else { $descr = $lineParts[2]; } if($numParts < 2) { $fname = basename($fpath); } else { $fname = $lineParts[1]; } if ( $fname && $fpath && is_readable($fpath)) { $formvars = array(); $formfiles = array(); $formvars['wpSourceType'] = "file"; $formvars['wpDestFile'] = $fname; $formvars['wpUploadDescription'] = $descr; $formvars['wpIgnoreWarning'] = "true"; $formvars['wpUpload'] = "Upload file"; $formfiles['wpUploadFile'] = $fpath; $snoopy->set_submit_multipart(); if($snoopy->submit($wikiroot . "/index.php?title=Special:Upload\n", $formvars, $formfiles)) { echo "success: $fname ($fpath)\n"; echo " filemtime: ".date("Ymd-His",filemtime($fpath)); } else { echo "FAILED: submit $fname ($fpath)\n"; } } else { if ( $fname && $fpath) echo "FAILED: $fname ($fpath)\n"; } //echo $snoopy->results; // for debugging, direct stdout to an HTML file and then open it } # End ForEach Loop exit; ?>