Manual:findAnomalies.php
Note: The findAnomalies.php script is no longer included in recent releases. See source code below if you want to use it.
findAnomalies.php is a maintenance script that searches for missing revisions and archive rows. The plmra mode is useful for situations in which, for a bunch of pages, you get an error of "The revision #0 of the page named '[page name]" does not exist. This is usually caused by following an outdated history link to a page that has been deleted. Details can be found in the deletion log." You can find all the pages that have that error.
Adding a --fix option for plmr would be a simple matter of finding the most recent revision for a page and setting page.page_latest to that. More simply, one could just use attachLatest.php.
Usage
editOption | Required | Description | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
mode | Required |
| ||||||||
ranges | Optional | For rmr or rmra mode, list adjacent rev_ids using ranges (e.g. 500-600) rather than in the comma-delimited 500, 501, 502 format. |
Sample output
editrmra mode
edit$ php findAnomalies.php --mode=rmra --ranges Reading the revision table... Reading the archive table... Reading the page table... Here are the revision IDs of the revisions missing from both the revision and archive tables: 557 1 row; 1 anomalies found so far 19195 1 row; 2 anomalies found so far 20515 1 row; 3 anomalies found so far 20517 1 row; 4 anomalies found so far 20519 1 row; 5 anomalies found so far 20530 1 row; 6 anomalies found so far 23454 1 row; 7 anomalies found so far 23473 1 row; 8 anomalies found so far 23475 1 row; 9 anomalies found so far 27001-27030 30 rows; 39 anomalies found so far 27061-27226 166 rows; 205 anomalies found so far 27228-27252 25 rows; 230 anomalies found so far 27254-27562 309 rows; 539 anomalies found so far 28828 1 row; 540 anomalies found so far 29303 1 row; 541 anomalies found so far 29305 1 row; 542 anomalies found so far 31735-31736 2 rows; 544 anomalies found so far 32414 1 row; 545 anomalies found so far 32545 1 row; 546 anomalies found so far 32554 1 row; 547 anomalies found so far 32702 1 row; 548 anomalies found so far 32721 1 row; 549 anomalies found so far 32840 1 row; 550 anomalies found so far 32951 1 row; 551 anomalies found so far 32976 1 row; 552 anomalies found so far 33007 1 row; 553 anomalies found so far 33044 1 row; 554 anomalies found so far 33079 1 row; 555 anomalies found so far 33170 1 row; 556 anomalies found so far 33200 1 row; 557 anomalies found so far 33263 1 row; 558 anomalies found so far 33332 1 row; 559 anomalies found so far 33381 1 row; 560 anomalies found so far 33774 1 row; 561 anomalies found so far 561 anomalies found
plmra mode
edit$ php findAnomalies.php --mode=plmra Reading the revision table... Reading the archive table... Reading the page table... Here are the pages that have no revision nor archived revision for their page_latest: Cambodia Cambridge,_Massachusetts Campaign_finance_law Campus_libertarian Canada [... etc., etc. ...] Drug_test Cannibalism Genital_integrity French_économistes Frédéric_Bastiat 178 anomalies found
Code
edit<?php
/**
* findAnomalies.php
* By Leucosticte
* Version 1.0.1
*
* Searches for:
* Mode plmra: pages whose page.page_latest points to a revision ID
* that's missing from revision.rev_id AND archive.ar_rev_id.
* Mode plmr: pages whose page.page_latest points to a revision ID
* that's missing from revision.rev_id but is in archive.ar_rev_id.
* Mode rmra: revisions missing from both revision.rev_id and
* archive.ar_id.
* Mode rmr: revisions missing from revision.rev_id.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Maintenance
*/
require_once __DIR__ . '/Maintenance.php';
/**
* Maintenance script to find database anomalies.
*
* @ingroup Maintenance
*/
class FindAnomalies extends Maintenance {
public function __construct() {
parent::__construct();
$this->addDescription( "Find database anomalies" );
$this->addOption( "mode", "Mode of operation (plmra, plmr, rmra, or rmr)", true,
true );
$this->addOption( "ranges", "For rmr or rmra mode, list adjacent rev_ids using ranges "
. "(e.g. 500-600) rather than in the comma-delimited 500, 501, 502 format",
false );
}
public function execute() {
if ( !defined( DB_SLAVE ) ) {
define( 'DB_SLAVE', DB_REPLICA );
}
$acceptableModes = array( 'plmra', 'plmr', 'rmra', 'rmr' );
$mode = $this->getOption( 'mode' );
$ranges = $this->getOption( 'ranges' );
if ( !in_array( $mode, $acceptableModes ) ) {
$this->output( "Usage: php findAnomalies.php -mode<plmra, plmr, rmra, or rmr>\n" );
die();
}
$dbr = wfGetDB( DB_SLAVE );
$this->output( "Reading the revision table...\n" );
$revisionResult = $dbr->select( 'revision', array( 'rev_id' ), array( '1=1' ) );
$revision = array();
foreach ( $revisionResult as $row ) {
$revision[$row->rev_id] = true;
}
$highestRevision = 0;
if ( $mode == 'plmra' || $mode == 'rmra' ) {
$this->output( "Reading the archive table...\n" );
$archiveResult = $dbr->select( 'archive', array( 'ar_rev_id' ), array( '1=1' ) );
foreach ( $archiveResult as $row ) {
$archive[$row->ar_rev_id] = true;
}
}
$this->output( "Reading the page table...\n" );
$pageResult = $dbr->select( 'page', array( 'page_namespace', 'page_title',
'page_latest' ), array( '1=1' ) );
$page = array();
foreach ( $pageResult as $row ) {
$page[$row->page_latest] = array(
'page_namespace' => $row->page_namespace,
'page_title' => $row->page_title,
);
if ( $row->page_latest > $highestRevision ) {
$highestRevision = $row->page_latest;
}
}
$archiveOnly = array();
$noArchiveNorRevision = array();
$modeMessages = array (
'plmra' => "Here are the pages that have no revision nor archived revision for their "
. "page_latest:\n",
'plmr' => "Here are the pages that have no revision for their page_latest:\n",
'rmra' => "Here are the revision IDs of the revisions missing from both the revision and "
. "archive tables:\n",
'rmr' => "Here are the revision IDs of the revisions missing from the revision table:\n"
);
$this->output( "\n" . $modeMessages[$mode] );
$namespaces = MWNamespace::getCanonicalNamespaces();
$foundAny = 0;
if ( $mode == 'plmra' || $mode == 'plmr' ) {
foreach ( $page as $latest => $thisPage ) {
if ( $mode == 'plmra' ) {
if ( !isset( $revision[$latest] ) && !isset( $archive[$latest] ) ) {
$foundAny++;
if ( $namespaces[$thisPage['page_namespace']] ) {
$this->output( $namespaces[$thisPage['page_namespace']] . ":" );
}
$this->output( $thisPage['page_title'] . "\n" );
}
}
if ( $mode == 'plmr' ) {
if ( !isset( $revision[$latest] ) ) {
$foundAny++;
if ( $namespaces[$thisPage['page_namespace']] ) {
$this->output( $namespaces[$thisPage['page_namespace']] . ":" );
}
$this->output( $thisPage['page_title'] . "\n" );
}
}
}
}
$rangeBegins = 0;
// Set to a number that will never be adjacent to an actual rev_id or ar_rev_id
$lastFound = -1;
$comma = false;
if ( $mode == 'rmra' || $mode == 'rmr' ) {
$count = 1;
while ( $count < $highestRevision ) {
if ( ( $mode == 'rmra' && !isset( $revision[$count] )
&& !isset( $archive[$count] ) )
|| ( $mode == 'rmr' && !isset( $revision[$count] ) ) )
{
$foundAny++;
if ( $ranges ) {
if ( !$rangeBegins ) {
$rangeBegins = $count;
}
$lastFound = $count;
} elseif ( $comma ) {
$this->output( $count . ", " );
}
} elseif ( $rangeBegins ) {
if ( $rangeBegins == $lastFound ) {
$this->output( $lastFound . "\t\t1 row; $foundAny anomalies "
. "found so far\n" );
} else {
$this->output( $rangeBegins . "-" . $lastFound . "\t"
. ( $lastFound - $rangeBegins + 1 ) . " rows; " );
if ( $lastFound - $rangeBegins + 1 < 10 ) {
$this->output( " " );
}
if ( $lastFound - $rangeBegins + 1 < 100 ) {
$this->output( " " );
}
if ( $lastFound - $rangeBegins + 1 < 1000 ) {
$this->output( " " );
}
if ( $lastFound - $rangeBegins + 1 < 10000 ) {
$this->output( " " );
}
$this->output( "$foundAny "
. "anomalies found so far\n" );
}
$rangeBegins = 0;
}
$count++;
$comma = true;
}
if ( $ranges && $rangeBegins ) {
if ( $rangeBegins == $lastFound ) {
$this->output( $lastFound . "\t\t1 row; $foundAny anomalies "
. "found so far\n" );
} else {
$this->output( $rangeBegins . "-" . $lastFound . "\t"
. ( $lastFound - $rangeBegins + 1 ) . " rows; " );
if ( $lastFound - $rangeBegins + 1 < 10 ) {
$this->output( " " );
}
if ( $lastFound - $rangeBegins + 1 < 100 ) {
$this->output( " " );
}
if ( $lastFound - $rangeBegins + 1 < 1000 ) {
$this->output( " " );
}
if ( $lastFound - $rangeBegins + 1 < 10000 ) {
$this->output( " " );
}
$this->output( "$foundAny "
. "anomalies found so far\n" );
}
}
if ( !$ranges ) {
$this->output( "\n" );
}
}
if ( !$foundAny ) {
$this->output( "No anomalies found!\n");
} else {
$this->output( "\n$foundAny anomalies found\n" );
}
}
}
$maintClass = 'FindAnomalies';
if( defined('RUN_MAINTENANCE_IF_MAIN') ) {
require_once( RUN_MAINTENANCE_IF_MAIN );
} else {
require_once( DO_MAINTENANCE ); # Make this work on versions before 1.17
}