Toolserver:~gregbard/philosobot/phillists/redlinks/rm blue.pl
This page was moved from the Toolserver wiki.
Toolserver has been replaced by Toolforge. As such, the instructions here may no longer work, but may still be of historical interest.
Please help by updating examples, links, template links, etc. If a page is still relevant, move it to a normal title and leave a redirect.
#!/usr/bin/perl
use strict; # 'strict' insists that all variables be declared
use diagnostics; # 'diagnostics' expands the cryptic warnings
use Encode;
undef $/; # undefines the separator. Can read one whole file in one scalar.
use lib $ENV{HOME} . '../modules'; # path to perl modules
require 'bin/perlwikipedia_utils.pl';
require 'bin/identify_redlinks.pl';
require 'utils/strip_accents_and_stuff.pl';
use open 'utf8';
MAIN:{
my (%links, $html_data, $letter, $count, @reds, @blues, $stripped_link, $link, %pages, %all_links_freq);
my ($file_encoded, $file, $red, $red_stripped, $text, $bot_page, $bot_link, $sleep, $attempts, %redlinks_freq);
my $Editor=wikipedia_login(); $sleep = 2;
$attempts = 100; # make $attempts large, it is surprising how often commits fail
$bot_page = 'User:Philosobot/Page2';
# read into a hash
$count=0;
open (FILE, "<", "Links.txt"); $text=<FILE>; close(FILE);
foreach $link (split ("\n", $text)){
next unless ($link =~ /^\[\[\s*(.*?)\s*\]\]\s+-*\s+(\d+)/);
$link=$1; $all_links_freq{$link} = $2;
# strip accents
$stripped_link= &strip_accents_and_stuff ($stripped_link);
# sort alphabetically, without overwriting items differing by accents
$count++;
$links{$link}= "$stripped_link $count";
# The line below is useful for debugging. Don't remove.
#last if ($count > 1000);
}
print "Done reading the hash\n";
# split into subpages, by first letter
foreach $link (sort {$links{$a} cmp $links{$b}} keys %links){
next unless ($links{$link} =~ /^(.)/);
$letter = uc($1);
$letter = "0-9" if ($letter =~ /[^A-Z]/i); # collapse non-alphabetic in one list
$pages{$letter}= "" unless (exists $pages{$letter});
$pages{$letter} = $pages{$letter} . "\[\[$link\]\]\n";
}
#identify redlinks
foreach $letter (sort {$a cmp $b} keys %pages){
print "$letter\n";
# submit to server several times, and wait a while,
# otherwise the page on the server is not always updated
for (my $repeat = 0; $repeat <= 2; $repeat++){
wikipedia_submit($Editor, $bot_page . '.wiki', "Add links, both blue and red",
$pages{$letter}, $attempts, $sleep);
print "Sleep 20\n"; sleep 20;
}
&identify_redlinks($bot_page, \@reds, \@blues);
$text="__NOTOC__\n{{User:Philosobot/Redlinks/TOC}}\n";
$text = $text . &create_sectioned_list (\@reds);
$file = "User:Philosobot/List_of_philosophical_redlinks_($letter).wiki";
wikipedia_submit($Editor, $file, "Update the list of redlinks", $text, $attempts, $sleep);
# store the redlinks by frequency
foreach $link (@reds){
if (exists $all_links_freq{$link}){
$redlinks_freq{$link} = $all_links_freq{$link};
}
}
#last;
}
# submit the most wanted links
my $most_wanted = 'User:Philosobot/Most wanted redlinks.wiki';
$text = "";
foreach $link (sort {$redlinks_freq{$b} <=> $redlinks_freq{$a} } keys %redlinks_freq){
last if ($redlinks_freq {$link} <= 1);
$text .= "\* \[\[$link\]\] -- $redlinks_freq{$link}\n";
}
wikipedia_submit($Editor, $most_wanted, "Update this list", $text, $attempts, $sleep);
}
sub create_sectioned_list {
my ($red, $red_stripped, $reds, $count, $text);
$reds = shift;
$count=1000;
$text = "";
foreach $red (@$reds){
if ($count > 51){
$red_stripped = &strip_accents_and_stuff($red_stripped);
#print "==$red_stripped==\n";
$red_stripped = substr($red_stripped, 0, 2);
$text = $text . "==$red_stripped==\n";
$count=0;
}
# print "$red\n";
$text = $text . "\[\[$red\]\] -- \n";
$count++;
}
return $text;
}