J.saterfiel
Joined 16 September 2009
Here is the PdfBook.php I use on my mediawiki installation (1.14). It's a little more advanced than the one currently available. List of new features:
- Ability to remove links in the documents
- Ability for a printed Category (collection of articles) to have a cover page with its Category Name and date created printed on it
- Ability to have a "Download as PDF" link in the tool bar on any page without needing to explicitly place a link on a page you want to create pdfs on.
- Ability to change the date format used on the header page (http://us3.php.net/manual/en/function.date.php)
- Ability to change the information printed on each page header and footer(will need to lookup htmldoc http://www.htmldoc.org/ for more info on what the options are and once installed run htmldoc -help as the full options are not displayed on their website.)
<?php
/**
* PdfBook extension altered version by J.saterfiel
* - Composes a book from articles in a category and exports as a PDF book
*
* See http://www.mediawiki.org/Extension:PdfBook for installation and usage details
* See http://www.organicdesign.co.nz/Extension_talk:PdfBook for development notes and disucssion
*
* Started: 2007-08-08
*
* @package MediaWiki
* @subpackage Extensions
* @author Aran Dunkley [http://www.organicdesign.co.nz/nad User:Nad]
* @author J.saterfiel
* @copyright © 2007 Aran Dunkley
* @licence GNU General Public Licence 2.0 or later
*/
if (!defined('MEDIAWIKI')) die('Not an entry point.');
define('PDFBOOK_VERSION', '1.0.3, 2008-12-09');
$wgExtensionFunctions[] = 'wfSetupPdfBook';
$wgHooks['LanguageGetMagic'][] = 'wfPdfBookLanguageGetMagic';
$wgExtensionCredits['parserhook'][] = array(
'path' => __FILE__,
'name' => 'PdfBook',
'author' => '[http://www.organicdesign.co.nz/nad User:Nad]',
'description' => 'Composes a book from articles in a category and exports as a PDF book',
'url' => 'http://www.mediawiki.org/wiki/Extension:PdfBook',
'version' => PDFBOOK_VERSION
);
class PdfBook {
public $ignoreLinks = false;
public $coverPage = "<html><body><p> </p><p> </p><p> </p><p> </p><p> </p><p> </p><p> </p><p> </p><p> </p><p> </p>".
"<center><h1><b>ARTICLE_TITLE</b></h1></center><center><h3>Downloaded from WIKI_URL</h3></center><center><h3>Accurate as of: CURRENT_DATE</h3></center></body></html>";
public $dateFormat = 'n/j/Y h:i A';
public $headerFormat = 'th.';
public $footerFormat = '.D/';
function PdfBook() {
global $wgHooks, $wgParser, $wgPdfBookMagic;
global $wgLogTypes, $wgLogNames, $wgLogHeaders, $wgLogActions;
$wgHooks['UnknownAction'][] = $this;
# Add a new pdf log type
$wgLogTypes[] = 'pdf';
$wgLogNames ['pdf'] = 'pdflogpage';
$wgLogHeaders['pdf'] = 'pdflogpagetext';
$wgLogActions['pdf/book'] = 'pdflogentry';
}
/**
* Perform the export operation
*/
function onUnknownAction($action, $article) {
global $wgOut, $wgUser, $wgTitle, $wgParser, $wgRequest;
global $wgServer, $wgArticlePath, $wgScriptPath, $wgUploadPath, $wgUploadDirectory, $wgScript;
if ($action == 'pdfbook') {
$title = $article->getTitle();
$opt = ParserOptions::newFromUser($wgUser);
# Log the export
$msg = $wgUser->getUserPage()->getPrefixedText().' exported as a PDF book';
$log = new LogPage('pdf', false);
$log->addEntry('book', $wgTitle, $msg);
# Initialise PDF variables
$format = $wgRequest->getText('format');
//$notitle = $wgRequest->getText('notitle');
$notitle = false;
$layout = $format == 'single' ? '--webpage' : '--firstpage toc';
$charset = $this->setProperty('Charset', 'iso-8859-1');
$left = $this->setProperty('LeftMargin', '1cm');
$right = $this->setProperty('RightMargin', '1cm');
$top = $this->setProperty('TopMargin', '1cm');
$bottom = $this->setProperty('BottomMargin','1cm');
$font = $this->setProperty('Font', 'Arial');
$size = $this->setProperty('FontSize', '8');
$linkcol = $this->setProperty('LinkColour', '217A28');
$levels = $this->setProperty('TocLevels', '2');
$exclude = $this->setProperty('Exclude', array());
$width = $this->setProperty('Width', '');
$width = $width ? "--browserwidth $width" : '';
if (!is_array($exclude)) $exclude = split('\\s*,\\s*', $exclude);
# Select articles from members if a category or links in content if not
if ($format == 'single') $articles = array($title);
else {
$articles = array();
if ($title->getNamespace() == NS_CATEGORY) {
$db = wfGetDB(DB_SLAVE);
$cat = $db->addQuotes($title->getDBkey());
$result = $db->select(
'categorylinks',
'cl_from',
"cl_to = $cat",
'PdfBook',
array('ORDER BY' => 'cl_sortkey')
);
if ($result instanceof ResultWrapper) $result = $result->result;
while ($row = $db->fetchRow($result)) $articles[] = Title::newFromID($row[0]);
}
else {
$text = $article->fetchContent();
$text = $wgParser->preprocess($text, $title, $opt);
if (preg_match_all('/^\\*\\s*\\[{2}\\s*([^\\|\\]]+)\\s*.*?\\]{2}/m', $text, $links))
foreach ($links[1] as $link) $articles[] = Title::newFromText($link);
}
}
# Format the article(s) as a single HTML document with absolute URL's
$book = $title->getText();
$html = '';
$wgArticlePath = $wgServer.$wgArticlePath;
$wgScriptPath = $wgServer.$wgScriptPath;
$wgUploadPath = $wgServer.$wgUploadPath;
$wgScript = $wgServer.$wgScript;
foreach ($articles as $title) {
$ttext = $title->getPrefixedText();
if (!in_array($ttext, $exclude)) {
$article = new Article($title);
$text = $article->fetchContent();
$text = preg_replace('/<!--([^@]+?)-->/s', '@@'.'@@$1@@'.'@@', $text); # preserve HTML comments
if ($format != 'single') $text .= '__NOTOC__';
$opt->setEditSection(false); # remove section-edit links
$wgOut->setHTMLTitle($ttext); # use this so DISPLAYTITLE magic works
$out = $wgParser->parse($text, $title, $opt, true, true);
$ttext = $wgOut->getHTMLTitle();
$text = $out->getText();
$text = preg_replace('|(<img[^>]+?src=")(/.+?>)|', "$1$wgServer$2", $text); # make image urls absolute
$text = preg_replace('|<div\s*class=[\'"]?noprint["\']?>.+?</div>|s', '', $text); # non-printable areas
$text = preg_replace('|@{4}([^@]+?)@{4}|s', '<!--$1-->', $text); # HTML comments hack
if($this->ignoreLinks){
$text = str_ireplace('<a','<span',$text);
$text = str_ireplace('</a>','</span>',$text);
}
#$text = preg_replace('|<table|', '<table border borderwidth=2 cellpadding=3 cellspacing=0', $text);
$ttext = basename($ttext);
$h1 = $notitle ? '' : "<center><h1>$ttext</h1></center>";
$html .= utf8_decode("$h1$text\n");
}
}
# If format=html in query-string, return html content directly
if ($format == 'html') {
$wgOut->disable();
header("Content-Type: text/html");
header("Content-Disposition: attachment; filename=\"$book.html\"");
print $html;
}
else {
# Write the HTML to a tmp file
$file = "$wgUploadDirectory/".uniqid('pdf-book');
$fh = fopen($file, 'w+');
fwrite($fh, $html);
fclose($fh);
#Write cover page to a tmp file
$curr_date = date($this->dateFormat);
$curr_year = date('Y');
$fileCoverLetter = "$wgUploadDirectory/".uniqid('pdf-book').'htm';
$fh = fopen($fileCoverLetter, 'w+');
$coverPageTmp = str_replace('CURRENT_DATE',$curr_date,$this->coverPage);
$coverPageTmp = str_replace('CURRENT_YEAR',$curr_year,$coverPageTmp);
$coverPageTmp = str_replace('ARTICLE_TITLE',$book,$coverPageTmp);
$coverPageTmp = str_replace('WIKI_URL',$_SERVER['SERVER_NAME'],$coverPageTmp);
fwrite($fh, $coverPageTmp);
fclose($fh);
$footer = $this->footerFormat;
$header = $this->headerFormat;
$toc = $format == 'single' ? '' : " --toclevels $levels";
# Send the file to the client via htmldoc converter
$wgOut->disable();
header("Content-Type: application/pdf");
header("Content-Disposition: attachment; filename=\"$book.pdf\"");
$cmd = "--left $left --right $right --top $top --bottom $bottom";
$cmd .= " --header $header --footer $footer --headfootsize 8 --quiet --jpeg --color";
$cmd .= " --bodyfont $font --fontsize $size --linkstyle plain --linkcolor $linkcol";
$cmd .= " --titlefile $fileCoverLetter";
$cmd .= "$toc --format pdf14 --numbered $layout $width";
$cmd = "htmldoc -t pdf --charset $charset $cmd $file";
putenv("HTMLDOC_NOCGI=1");
passthru($cmd);
@unlink($file);
}
return false;
}
return true;
}
/**
* Return a property for htmldoc using global, request or passed default
*/
function setProperty($name, $default) {
global $wgRequest;
if ($wgRequest->getText("pdf$name")) return $wgRequest->getText("pdf$name");
if (isset($GLOBALS["wgPdfBook$name"])) return $GLOBALS["wgPdfBook$name"];
return $default;
}
/**
* Needed in some versions to prevent Special:Version from breaking
*/
function __toString() { return 'PdfBook'; }
}
/**
* Called from $wgExtensionFunctions array when initialising extensions
*/
function wfSetupPdfBook() {
global $wgPdfBook;
global $wgPDFBookIgnoreLinks,$wgPDFBookCoverPage,$wgPDFBookCoverPageDateFormat,$wgPDFBookHeaderFormat,$wgPDFBookFooterFormat;
$wgPdfBook = new PdfBook();
if(isset($wgPDFBookIgnoreLinks)){
$wgPdfBook->ignoreLinks = $wgPDFBookIgnoreLinks;
}
if(isset($wgPDFBookCoverPage)){
$wgPdfBook->coverPage = $wgPDFBookCoverPage;
}
if(isset($wgPDFBookCoverPageDateFormat)){
$wgPdfBook->dateFormat = $wgPDFBookCoverPageDateFormat;
}
if(isset($wgPDFBookHeaderFormat)){
$wgPdfBook->headerFormat = $wgPDFBookHeaderFormat;
}
if(isset($wgPDFBookFooterFormat)){
$wgPdfBook->footerFormat = $wgPDFBookFooterFormat;
}
}
/**
* Needed in MediaWiki >1.8.0 for magic word hooks to work properly
*/
function wfPdfBookLanguageGetMagic(&$magicWords, $langCode = 0) {
global $wgPdfBookMagic;
$magicWords[$wgPdfBookMagic] = array($langCode, $wgPdfBookMagic);
return true;
}
//Add on for link to print on the tool bar menu
$wgHooks['SkinTemplateBuildNavUrlsNav_urlsAfterPermalink'][] = 'wfSpecialPdfNav';
$wgHooks['SkinTemplateToolboxEnd'][] = 'wfSpecialPdfToolbox';
function wfSpecialPdfNav( &$skintemplate, &$nav_urls, &$oldid, &$revid ) {
$nav_urls['pdfprint'] = array(
'text' => 'Download as PDF',
'href' => $nav_urls['href'].'?action=pdfbook&format=single'
);
return true;
}
function wfSpecialPdfToolbox( &$monobook ) {
if ( isset( $monobook->data['nav_urls']['pdfprint'] ) )
if ( $monobook->data['nav_urls']['pdfprint']['href'] == '' ) {
?><li id="t-ispdf"><?php htmlspecialchars( $monobook->data['nav_urls']['pdfprint']['text'] ); ?></li><?php
} else {
?><li id="t-pdf"><?php
?><a href="<?php echo htmlspecialchars( $monobook->data['nav_urls']['pdfprint']['href'] ) ?>"><?php
echo htmlspecialchars( $monobook->data['nav_urls']['pdfprint']['text'] );
?></a><?php
?></li><?php
}
return true;
}
?>
Example configuration that would be placed in your LocalSettings.php file:
<?
#PdfBook Configuration
require "$IP/extensions/PdfBook/PdfBook.php";
$wgPDFBookIgnoreLinks = true;
//Use ARTICLE_TITLE to place the category or single article title
//Use CURRENT_DATE to place the current date
//Use CURRENT_YEAR to place the current year 4 digits
//Use WIKI_URL for the url of the request
//Default value is below
$wgPDFBookCoverPage = "<html><body><p> </p><p> </p><p> </p><p> </p><p> </p><p> </p><p> </p><p> </p><p> </p>"
."<p> </p><center><h1><b>ARTICLE_TITLE</b></h1></center><center><h3>Downloaded from WIKI_URL</h3></center><center><h3>Accurate as of: CURRENT_DATE</h3>"
."</center></body></html>";
//Default value is 'n/j/Y h:i A'
$wgPDFBookCoverPageDateFormat = 'n/j/Y h:i A';
//Default value is 'th.'
$wgPDFBookHeaderFormat = 'th.';
//Default value is '.D/'
$wgPDFBookFooterFormat = '.D/';
?>