From Discworld MUD Wiki
#!/usr/bin/perl
# This script is run without arguments.
# To run it, you need to:
# - Have perl of version 5.10 or later.
# - Have the 'wget' tool.
# - Paste the output of your 'skills stats' command from the MUD into the
# a file with the INPUT_FILENAME below.
# - Manually fill out $password below with the actual password for the
# account.
use strict;
use 5.010_000;
use URI::Escape;
use constant INPUT_FILENAME => "skills.dat";
use constant RESULT_FILENAME => "results.xml";
use constant COOKIE_FILENAME => "cookie.txt";
use constant C_CON => "C";
use constant C_DEX => "D";
use constant C_INT => "I";
use constant C_STR => "S";
use constant C_WIS => "W";
use constant C_NONE => "-";
use constant API_URL => "http://discworld.imaginary-realities.com/w/api.php";
use constant USERNAME => "Skillsbot";
use constant APIHIGHLIMITS_QUERYCAP => 500;
use constant EDIT_SUMMARY => "Automatically created article";
my $password = <!--Fill this out yourself!-->;
my @columns;
my %skills;
my %children;
my %abbreviations;
my %chunk_names;
my %last_names;
my $max_columns = 0;
my $num_skills = 0;
my $m_con = C_CON;
my $m_dex = C_DEX;
my $m_int = C_INT;
my $m_str = C_STR;
my $m_wis = C_WIS;
my $login_token;
my $login_userid;
my $login_sessionid;
my $login_cookieprefix;
sub parse_file;
sub create_skills;
sub make_unique_shortnames;
sub get_shortname;
sub dump_results;
sub http_login;
sub http_logout;
sub remove_existing;
sub create_skill_articles;
sub create_abbreviations;
# Main execution
&parse_file(INPUT_FILENAME);
&create_skills();
&make_unique_shortnames();
&http_login($password);
sleep(1);
&remove_existing(\%skills);
sleep(1);
&remove_existing(\%abbreviations);
sleep(1);
&dump_results();
sleep(1);
&create_skill_articles();
sleep(1);
&create_abbreviations();
sleep(1);
&http_logout();
sub parse_file
{
my ($filename) = @_;
my $column;
my $skill;
my $nest_string;
my $name;
my $stats;
my $nest_level;
# Open the input file. This should contain a dump of the output of your
# 'skills stats' command (strip out any 'more prompts').
open (INFILE, $filename) or die "Could not open ".INPUT_FILENAME."\n";
while ($_ = <INFILE>)
{
chomp;
$column = 0;
while ($_ ne "")
{
# Parse the skill. This gets the '| ' bits in front (tells us the
# nesting level), the skill name and its stat dependencies. It
# deliberately only gets one skill at a time - this lets us keep track of
# the column count.
$skill = $_;
if ($skill =~ /(?<nest>([|] )*)(?<name>[a-z-]+)\.*[ ]+(?<stats>[CDSIW-]+)[ ]*/)
{
$_ = $';
$nest_string = $+{nest};
$name = $+{name};
$stats = $+{stats};
# Convert the nest string into a nesting level
$nest_level = 0;
while($nest_string ne "")
{
$nest_level++;
$nest_string =~ s/[|] //;
}
push(@{$columns[$column]}, [$name, $nest_level, $stats]);
$column++;
$num_skills++;
if ($column > $max_columns)
{
$max_columns = $column;
}
}
else
{
last;
}
}
}
close(INFILE);
}
sub create_skills
{
my $i;
my $j;
my $k;
my $name;
my $full_name;
my $nest_level;
my $abbr_name;
my $stats;
my @tree;
my $con;
my $dex;
my $int;
my $str;
my $wis;
my $parent;
my $chunk_name;
# Iterate over each entry. Because of the iteration ordering below, we do
# all the entries in the first column, then all the entries in the second
# column, and so on. This gives us the 'correct' skill ordering -
# specifically, such that all children follow their parents.
for ($i = 0; $i < $max_columns; $i++)
{
for ($j = 0; $j <= $#{$columns[$i]}; $j++)
{
$name = $columns[$i][$j][0];
$nest_level = $columns[$i][$j][1];
$stats = $columns[$i][$j][2];
# First, we want to convert this skill into its full name - to do so,
# we save off the name at each tree level when it is encountered, and
# make use of the property that the last encountered skill at a given
# tree level must be the direct ancestor of the current skill.
#
# We also want the 2.2.2.2 form of each name, which we get using the
# same technique.
#
# We're also interested in storing the parent of each node - we'll use
# it later when we're determining the abbreviations for elements.
$tree[$nest_level] = $name;
$full_name = $tree[0];
$abbr_name = substr($tree[0], 0, 2);
if ($nest_level > 0)
{
$parent = $tree[0];
for ($k = 1; $k <= $nest_level; $k++)
{
$full_name = join('.', $full_name, $tree[$k]);
$abbr_name = join('.', $abbr_name, substr($tree[$k], 0, 2));
if ($k < $nest_level)
{
$parent = $full_name;
}
}
}
else
{
$parent = "";
}
# Put this skill in the children hash entry belonging to its parent.
push(@{$children{$parent}}, $full_name);
# Put this skill in the abbreviations hash entry belonging to the
# 2.2.2.2 name, and to its last name.
push(@{$abbreviations{$abbr_name}}, $full_name);
push(@{$abbreviations{$name}}, $full_name);
# Save off this skill's last name for convenience.
$last_names{$full_name} = $name;
# Get the 'chunk name' for this element - this is the first two
# characters of its final text portion. This is used later when
# we're sorting out the unique shortnames.
$chunk_name = substr($name, 0, 2);
$chunk_names{$full_name} = $chunk_name;
# Now convert the stats, by simply counting up the occurrences. Note
# that there's a special case for certain tree roots that have no
# associated skills (such as people.trading).
if ($stats eq C_NONE)
{
$con = C_NONE;
$dex = C_NONE;
$int = C_NONE;
$str = C_NONE;
$wis = C_NONE;
}
else
{
$con = (($stats =~ s/$m_con//g) or 0);
$dex = (($stats =~ s/$m_dex//g) or 0);
$int = (($stats =~ s/$m_int//g) or 0);
$str = (($stats =~ s/$m_str//g) or 0);
$wis = (($stats =~ s/$m_wis//g) or 0);
}
print $full_name." (".$parent.") [".$con." ".$dex." ".$int." ".$str." ".$wis."]\n";
@skills{$full_name} = [$con, $dex, $int, $str, $wis];
}
}
}
sub make_unique_shortnames
{
my $parent;
my $i;
my $j;
my $sibling_a;
my $sibling_b;
my $len;
# Iterate over all the parents.
foreach $parent (keys %children)
{
# Each entry in the children array corresponds to a direct child of this
# parent. In order for shortnames to be unique, the chunk name for each
# element must be unique amongst its direct siblings. This is what we
# enforce here.
for ($i = 0; $i <= $#{@{children{$parent}}}; $i++)
{
$sibling_a = $children{$parent}[$i];
for ($j = $i + 1; $j <= $#{@{children{$parent}}}; $j++)
{
$sibling_b = $children{$parent}[$j];
$len = length($chunk_names{$sibling_a});
while ($chunk_names{$sibling_a} eq $chunk_names{$sibling_b})
{
$len++;
$chunk_names{$sibling_a} = substr($last_names{$sibling_a}, 0, $len);
$chunk_names{$sibling_b} = substr($last_names{$sibling_b}, 0, $len);
}
}
}
}
}
sub get_shortname
{
my ($full_name) = @_;
my $shortname;
my @name_segments;
my $cur_ancestor;
my $i;
# Split the full name up.
@name_segments = split(/\./, $full_name);
# Create the shortname as the concatenation of the chunk names of each of
# this skill's ancestors.
$cur_ancestor = $name_segments[0];
$shortname = $chunk_names{$cur_ancestor};
for ($i = 1; $i <= $#name_segments; $i++)
{
$cur_ancestor = join('.', $cur_ancestor, $name_segments[$i]);
$shortname = join('.', $shortname, $chunk_names{$cur_ancestor});
}
return $shortname;
}
sub dump_results
{
my $skill;
my $abbreviation;
my $j;
# Dump each skill, its unique shortname and its stat dependencies
print "SKILLS:\n";
foreach $skill (sort keys %skills)
{
print $skill." (".&get_shortname($skill)."): ".$skills{$skill}[0]." ".$skills{$skill}[1]." ".$skills{$skill}[2]." ".$skills{$skill}[3]." ".$skills{$skill}[4]."\n";
}
# Now dump all the abbreviations, highlighting those which are ambiguous
print "\nABBREVIATIONS:\n";
foreach $abbreviation (sort keys %abbreviations)
{
if ($#{@{abbreviations{$abbreviation}}} > 0)
{
print "* ";
}
print $abbreviation." ->";
for ($j = 0; $j <= $#{@{abbreviations{$abbreviation}}}; $j++)
{
print " ".$abbreviations{$abbreviation}[$j];
}
print "\n";
}
}
sub http_login
{
my ($password) = @_;
my $post_data;
my $command;
# Build the POST data
$post_data = "lgname=".USERNAME."&lgpassword=".uri_escape($password);
# Execute login
print "Logging in... ";
$command = "wget -q --save-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." --post-data=\"".$post_data."\" \"".API_URL."?action=login&format=xml\"";
`$command`;
open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n";
$_ = <RESULTS>;
close(RESULTS);
# Check the login was successful
if (!(/login result=\"Success\"/))
{
print "FAILED\n";
print $_;
die;
}
print "OK\n";
# Save off the tokens
/lguserid=\"(?<userid>[^\"]+)\".*lgtoken=\"(?<token>[^\"]+)\" cookieprefix=\"(?<cp>[^\"]+)\" sessionid=\"(?<sessionid>[^\"]+)\"/;
$login_userid = $+{userid};
$login_token = $+{token};
$login_cookieprefix = $+{cp};
$login_sessionid = $+{sessionid};
print "Userid: ".$login_userid."\n";
print "Token: ".$login_token."\n";
print "Cookie prefix: ".$login_cookieprefix."\n";
print "Session ID: ".$login_sessionid."\n";
}
sub http_logout
{
my ($password) = @_;
my $post_data;
my $command;
# Execute logout
print "Logging out... ";
$command = "wget -q --load-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." \"".API_URL."?action=logout&format=xml\"";
`$command`;
# There aren't any useful results to speak of, and there's little we can do
# if the logout fails, so just end here.
print "OK\n";
}
sub remove_existing
{
my ($href) = @_;
my $key;
my @queries;
my $query_num = 0;
my $keys_in_query = 0;
my $query;
my $command;
# Build up lists of all the titles we want to query.
foreach $key (sort keys %$href)
{
# Add each element into a query. Note that there's a (high) limit on the
# number we can query with each http request, hence we may need to send
# multiple batches
if ($keys_in_query == 0)
{
$queries[$query_num] = $key;
}
else
{
$queries[$query_num] = join('|', $queries[$query_num], $key);
}
$keys_in_query++;
if ($keys_in_query >= APIHIGHLIMITS_QUERYCAP)
{
$query_num++;
$keys_in_query = 0;
}
}
# Now send in the queries.
foreach $query (@queries)
{
print "Querying: ".$query."\n\n";
$command = "wget -q --load-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." \"".API_URL."?action=query&format=xml&titles=".$query."\"";
`$command`;
open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n";
$_ = <RESULTS>;
close(RESULTS);
# Parse the results. We're looking for pages that _aren't_ marked as
# missing. Since those are the pages that already exist, erase them from
# our local list of pages to deal with.
while (/title=\"(?<key>[^\"]+)\" \/>/)
{
$key = lc($+{key});
$_ = $';
delete $$href{$key};
}
}
}
sub create_skill_articles
{
my $skill;
my $command;
my $edit_token;
my $article_text;
my $post_data;
my $is_branch = 0;
my $childlevel = 0;
my @elements;
my $child;
my $child_string;
my $child_num;
# Get an edit token
$command = "wget -q --keep-session-cookies --load-cookies=".COOKIE_FILENAME." --save-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." \"".API_URL."?action=query&format=xml&prop=info&intoken=edit&titles=Main%20Page\"";
`$command`;
open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n";
$_ = <RESULTS>;
close(RESULTS);
# Parse the results to get the edit token.
if (/edittoken=\"(?<token>[^\"]+)\"/)
{
$edit_token = $+{token};
}
else
{
print "Could not find edit token:\n";
print $_;
die;
}
foreach $skill (sort keys %skills)
{
# Determine whether this skill is a leaf or a branch.
if (exists $children{$skill})
{
# Branch
$is_branch = 1;
@elements = split(/\./, $skill);
$childlevel = $#elements + 2;
}
else
{
# Leaf
$is_branch = 0;
}
# Build the article data
if ($is_branch == 1)
{
$article_text = "{{Infonav skill\n".
" |shortform=".&get_shortname($skill)."\n".
" |con=".$skills{$skill}[0]."\n".
" |dex=".$skills{$skill}[1]."\n".
" |int=".$skills{$skill}[2]."\n".
" |str=".$skills{$skill}[3]."\n".
" |wis=".$skills{$skill}[4]."\n".
" |childlevel=".$childlevel."\n";
$child_num = 1;
for ($child_num = 0; $child_num <= $#{@{children{$skill}}}; $child_num++)
{
$child = $children{$skill}[$child_num];
@elements = split(/\./, $child);
$child_string = " |child".($child_num + 1)."=".$elements[$#elements]."\n";
$article_text = join('', $article_text, $child_string);
}
$article_text = join('',
$article_text,
"}}\n".
"\n".
"{{skillsbot-stub}}\n".
"\n".
"[[Category:Skills]]\n");
}
else
{
$article_text = "{{Infonav skill\n".
" |shortform=".&get_shortname($skill)."\n".
" |con=".$skills{$skill}[0]."\n".
" |dex=".$skills{$skill}[1]."\n".
" |int=".$skills{$skill}[2]."\n".
" |str=".$skills{$skill}[3]."\n".
" |wis=".$skills{$skill}[4]."\n".
"}}\n".
"\n".
"{{skillsbot-stub}}\n".
"\n".
"[[Category:Skills]]\n";
}
# Build the POST data.
$post_data = "action=edit".
"&format=xml".
"&title=".uri_escape(ucfirst($skill)).
"&summary=".uri_escape(EDIT_SUMMARY).
"&bot".
"&createonly".
"&text=".uri_escape($article_text).
"&token=".uri_escape($edit_token);
# Execute the command, check it completed OK.
print "Creating: ".$skill." ... ";
$command = "wget -q --keep-session-cookies --load-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." --post-data=\"".$post_data."\" \"".API_URL."\"";
`$command`;
open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n";
$_ = <RESULTS>;
close(RESULTS);
# Check the login was successful
if (!(/result=\"Success\"/))
{
print "FAILED\n";
print $_;
die;
}
print "OK\n";
}
}
sub create_abbreviations
{
my $abbr;
my $command;
my $edit_token;
my $article_text;
my $post_data;
my $is_disambig = 0;
my $child;
my $child_string;
my $child_num;
# Get an edit token
$command = "wget -q --keep-session-cookies --load-cookies=".COOKIE_FILENAME." --save-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." \"".API_URL."?action=query&format=xml&prop=info&intoken=edit&titles=Main%20Page\"";
`$command`;
open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n";
$_ = <RESULTS>;
close(RESULTS);
# Parse the results to get the edit token.
if (/edittoken=\"(?<token>[^\"]+)\"/)
{
$edit_token = $+{token};
}
else
{
print "Could not find edit token:\n";
print $_;
die;
}
foreach $abbr (sort keys %abbreviations)
{
# Determine whether this it to be a redirect or disambiguation.
if ($#{@{abbreviations{$abbr}}} > 0)
{
# Disambiguation
$is_disambig = 1;
}
else
{
# Redirect
$is_disambig = 0;
}
# Build the article data
if ($is_disambig == 1)
{
$article_text = "{{disambig}}\n".
"\n".
"'''".$abbr."''' may refer to:\n";
for ($child_num = 0; $child_num <= $#{@{abbreviations{$abbr}}}; $child_num++)
{
$child = $abbreviations{$abbr}[$child_num];
$child_string = "* [[".$child."]]\n";
$article_text = join('', $article_text, $child_string);
}
}
else
{
$child = $abbreviations{$abbr}[0];
$article_text = "#REDIRECT [[".$child."]]\n";
}
# Build the POST data.
$post_data = "action=edit".
"&format=xml".
"&title=".uri_escape(ucfirst($abbr)).
"&summary=".uri_escape(EDIT_SUMMARY).
"&bot".
"&createonly".
"&text=".uri_escape($article_text).
"&token=".uri_escape($edit_token);
# Execute the command, check it completed OK.
print "Creating: ".$abbr." ... ";
$command = "wget -q --keep-session-cookies --load-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." --post-data=\"".$post_data."\" \"".API_URL."\"";
`$command`;
open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n";
$_ = <RESULTS>;
close(RESULTS);
# Check the login was successful
if (!(/result=\"Success\"/))
{
print "FAILED\n";
print $_;
die;
}
print "OK\n";
}
}