User:Alterego/ExtensionMatrix/Source code

Extension Matrix Source edit

This is the source code for User:Alterego/ExtensionMatrix

username = 'test'
password = 'test'

from re import sub
from sys import path
from dateutil.parser import parse
import datetime
path.append('/usr/local/mwclient')
path.append('/usr/local/mwclient/simplejson')
import client as mwclient

site = mwclient.Site('www.mediawiki.org', path='/w/')
site.login(username,password)

all_extensions = site.categories["All extensions"]

extensions, extensions_dicts, extensions_by_type, extensions_by_status = {}, {}, {}, {}
extensions_by_mw_version, extensions_by_creation_date = {}, {}
recently_edited, recently_discussed, recently_updated, recently_created = [], [], [], []

# converts an extension dict back into template format
def BuildTemplate(extension_dict):
    template = '{{ExtensionMatrix\n'
    keys = extension_dict.keys()
    for key in keys:
        # This guy giving me a hard time for some reason
        if '<!-' in extension_dict[key] or '-->' in extension_dict[key]:
            continue
        # Build this line of the template
        template += '|' + key + '=' + extension_dict[key] + '\n'

    template += '}}\n'
    return template

##########################################
# Download the template for each extension
##########################################

for this_extension in all_extensions:
    try:
        extension_name = this_extension.name.split(':')[1]

        # Keep empty vals around to create a list of poorly formatted extensions
        extensions[extension_name] = ''

        # Extract the wikitext. Normally wouldn't be this simple but
        # the extensions are well formatted, each ending with
        # \n}}. Could recursively look for sub templates to be more
        # sure we're at the end.

        wikitext = site.Pages['Extension:' + extension_name].edit()
        template_start = wikitext.find('{{Extension')

        if template_start == -1:
            template_start = wikitext.find('{{extension')

        if template_start == -1:
            raise

        template_end = template_start + wikitext[template_start:].find('\n}}')
        template = wikitext[template_start:template_end+3]
        extensions[extension_name] = template

    except:
        # If someone did something stupid, not worth breaking the bot
        continue

# With just a little work we can turn the template into a dictionary
# and then do some cleanup processing of its parameters. This bot
# is definitely relying on the fact that the template ends with \n}}

for extension in extensions.keys():

    extension_dict = {}
    hooks, tags, types = [], [], []

    template = extensions[extension]

    # Some people like to have funky spacing. Double up just in case
    template = template.replace(' |','|').replace('  |','|').replace('| ','|').replace('|  ','|')

    # This hacks off {{Extension and }}, and has the convenient side effect
    # of nuking |templatemode= when it shows up on the first line
    template = template.split('\n')[1:-1]

    # Can't allow newlines - saw way too many crazy template values. In order for
    # this to be sane the template must have a pipe as the first non whitespace char
    # on each line
    filtered_template = []
    for line in template:
        if len(line):
            if line[0] == '|':
                filtered_template.append(line)
    template = filtered_template

    # Don't allow subpage extensions
    if '/' in extension:
        continue

    for param in template:
        try:
            param = param.split('=',1)
            key = param[0].replace('|','').strip()
            value = param[1].strip()
        except:
            continue # Can't do this? Not my fault.

        if not len(value.strip()):
            continue

        if 'name' in key:
            # Sometimes the name field doesn't contain the actual name of the extension
            value = extension
        if key.find('hook') is not -1:
            hooks.append(value)
            continue
        if key.find('tag') is not -1:
            tags.append(value)
            continue
        if key.find('type') is not -1:
            types.append(value)
            continue
        # Have a look at LocalisationUpdate for nested templateness that is just not ok.
        if '{{' in value and not '}}' in value:
            continue
        if '}}' in value and not '{{' in value:
            continue
        # These always turn out to be copy/paste jobs from the prototype template
        if '<!-' in value or '-->' in value:
            continue
        if '<ref>' in value:
            value = value.replace('<ref>',' ')
        if '</ref>' in value:
            value = value.replace('</ref>',' ')
        extension_dict[key] = value

    if hooks:
        hooks.sort()
        hooks = '<br/>'.join(hooks)
        extension_dict['hooks'] = hooks
    if tags:
        tags.sort()
        tags = '<br/>'.join(tags)
        extension_dict['tags'] = tags   
    if types:
        types.sort()
        types = '<br/>'.join(types)
        extension_dict['types'] = types

    # Sometimes the name isn't specified at all
    if not extension_dict.has_key('name'):
        extension_dict['name'] = extension

    # If there is a newline in a template parameter, that's probably
    # going to mess things up
    keys = extension_dict.keys()

    # Don't allow empty templates, or templates with just one parameter
    if not len(keys) or len(keys) == 1:
        continue

    extensions_dicts[extension] = extension_dict

# sorted list of the full matrix for later use
sorted_matrix = extensions_dicts.keys()
sorted_matrix.sort()

# convert all parseable dates into a common wikitable-sortable format
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
for extension in extensions_dicts.keys():
    if extensions_dicts[extension].has_key('update'):
        try:
            this_date = parse(extensions_dicts[extension]['update'])
            this_day = this_date.day
            this_month = months[this_date.month-1]
            this_year = this_date.year
            extensions_dicts[extension]['update'] = str(this_day) + ' ' + \
                                                    str(this_month) + ' ' + \
                                                    str(this_year)
        except:
            del extensions_dicts[extension]['update']

##########################################
# figure out what versions of mediawiki this extension works on
# this just looks for a string match of the version. i personally
# don't trust the +,>=,etc.. sign people like to use, for example, 1.12+.
# that generally means that they tested it on 1.12, but not the
# versions that came afterwards.
##########################################

for extension in extensions_dicts.keys():
    if extensions_dicts[extension].has_key('mediawiki'):
        supported_versions = []
        version_text = extensions_dicts[extension]['mediawiki']
        for major_version in xrange(1,3):
            for version in xrange(0,20): 
                this_version = str(major_version) + '.' + str(version)
                # TODO: Hacky. This still fails on i.e.
                # - 1.11.0+ in that it puts it in 1.0 because it matches on "1.0+"
                # - mediawiki 1.16 <= accesscontrol 1.3 because it matches on "1.3 "
                # - 1.6.x, 1.8.x, 1.9.x or higher (not tested by author on most recent MW versions - i.e. > 1.12) because it matches on "1.12)"
                if version_text.find(this_version + " ") != -1 or \
                   version_text.find(this_version + "\n") != -1 or \
                   version_text.find(this_version + "(") != -1 or \
                   version_text.find(this_version + ".") != -1 or \
                   version_text.find(this_version + "+") != -1:
                    supported_versions.append(this_version)
                    if not extensions_by_mw_version.has_key(this_version):
                        extensions_by_mw_version[this_version] = [extension]
                    else:
                        extensions_by_mw_version[this_version].append(extension)
        extensions_dicts[extension]['mediawiki'] = ', '.join(supported_versions)

##########################################
# Get the last day that each extension and its talk page were edited
# and the creation date of the extension
##########################################


for extension in extensions_dicts.keys():
    this_extension = site.Pages["Extension:" + extension]

    if this_extension.exists: # should never fail!
        this_date = this_extension.touched
        this_day = this_date.tm_mday
        this_month = months[this_date.tm_mon-1]
        this_year = this_date.tm_year
        this_date = str(this_day) + ' ' + \
                    str(this_month) + ' ' + \
                    str(this_year)
        extensions_dicts[extension]['lastupdated'] = this_date


        first_edit_timestamp = list(this_extension.revisions())[-1]['timestamp']
        first_edit_year = first_edit_timestamp.tm_year
        first_edit_month = months[first_edit_timestamp.tm_mon-1]
        first_edit_day = first_edit_timestamp.tm_mday
        first_edit_date = str(first_edit_day) + ' ' + \
                          str(first_edit_month) + ' ' + \
                          str(first_edit_year)
        extensions_dicts[extension]['created'] = first_edit_date

    this_extension = site.Pages["Extension_talk:" + extension]
    if this_extension.exists:
        this_date = this_extension.touched
        this_day = this_date.tm_mday
        this_month = months[this_date.tm_mon-1]
        this_year = this_date.tm_year
        this_date = str(this_day) + ' ' + \
                    str(this_month) + ' ' + \
                    str(this_year)        
        extensions_dicts[extension]['lastupdatedtalk'] = this_date


##########################################
# Create lists the most recently edited, discussed, updated and created extensions
# A bit redundant with above code, but its more clear to break it out
# Key to sorting by date is a tuple with (year,month,day). easy peasy.
##########################################        

for extension in extensions_dicts.keys():
    if extensions_dicts[extension].has_key('lastupdated'):
        this_date = parse(extensions_dicts[extension]['lastupdated'])
        recently_edited.append((this_date.year, this_date.month, this_date.day, extension))
    if extensions_dicts[extension].has_key('lastupdatedtalk'):
        this_date = parse(extensions_dicts[extension]['lastupdatedtalk'])
        recently_discussed.append((this_date.year, this_date.month, this_date.day, extension))
    if extensions_dicts[extension].has_key('update'):
        this_date = parse(extensions_dicts[extension]['update'])
        recently_updated.append((this_date.year, this_date.month, this_date.day, extension))
    if extensions_dicts[extension].has_key('created'):
        this_date = parse(extensions_dicts[extension]['created'])
        recently_created.append((this_date.year, this_date.month, this_date.day, extension))

recently_edited.sort()
recently_discussed.sort()
recently_updated.sort()
recently_created.sort()

recently_edited.reverse()
recently_discussed.reverse()
recently_updated.reverse()
recently_created.reverse()

##########################################
# extensions by type
##########################################

for extension in extensions_dicts.keys():
    this_extension = extensions_dicts[extension]
    if this_extension.has_key('types'):
        this_extensions_types = this_extension['types'].split('<br/>')
        for this_type in this_extensions_types:
            this_type = this_type.lower()
            if '--' in this_type:
                this_type = this_type.split('<!--')[0]
            if not extensions_by_type.has_key(this_type):
                if '--' in this_type:
                    this_type = this_type.split('<!--')[0]
                extensions_by_type[this_type] = [extension]
            else:
                extensions_by_type[this_type].append(extension)
    else:
        if not extensions_by_type.has_key('notype'):
            extensions_by_type['notype'] = [extension]
        else:
            extensions_by_type['notype'].append(extension)

for this_type in extensions_by_type.keys():
    if not len(extensions_by_type[this_type]) >= 5:
        del extensions_by_type[this_type]

##########################################
# extensions by status
##########################################
extensions_by_status = {}
for extension in extensions_dicts.keys():
    this_extension = extensions_dicts[extension]
    if this_extension.has_key('status'):
        this_status = this_extension['status'].lower()
        # Make sure this is a single word status - sanity check
        if len(this_status.split(' ')) == 1:
            if not extensions_by_status.has_key(this_status):
                extensions_by_status[this_status] = [extension]
            else:
                extensions_by_status[this_status].append(extension) 

##########################################
# Create main extension matrix output page
##########################################
extension_matrix = ''
prefix = 'Extension Matrix'

updated = 'Last updated: ' + \
          datetime.datetime.now().strftime("%Y-%m-%d %H:%M") + ' MST. '

num_listed = 'Listing ' + str(len(extensions_dicts)) + \
             ' out of ' + str(len(extensions)) + \
             ' members of [[:Category:Extensions]]<br/>'


extension_matrix = updated + num_listed + '\n'
extension_matrix += '== Entire Extension Matrix ==\n'
extension_matrix += '* [[' + prefix + '/AllExtensions|View all extensions]] (very large!)\n'

# Create the entire extension matrix
entire_matrix = '{{ExtensionMatrixHeader}}'
for extension in sorted_matrix:
    entire_matrix += BuildTemplate(extensions_dicts[extension])
entire_matrix += '{{ExtensionMatrixFooter}}'
page = site.Pages[prefix + "/AllExtensions"]
page.save(entire_matrix)


# One subpage for each version of mediawiki that has extensions which mention it
extension_matrix += '== By explicitly supported MediaWiki version ==\n* '

for major_version in xrange(1,3):
    for version in xrange(0,20): 
        version = str(major_version) + '.' + str(version)

        if extensions_by_mw_version.has_key(version):
            num_extensions = str(len(extensions_by_mw_version[version]))
            extension_matrix += '[[' + prefix + '/' + version + '|' + version + ']] (' + num_extensions + '), '

            # Create an extension matrix for each version
            this_version_matrix = '{{ExtensionMatrixHeader}}'
            for extension in extensions_by_mw_version[version]:
                this_version_matrix += BuildTemplate(extensions_dicts[extension])
            this_version_matrix += '{{ExtensionMatrixFooter}}'
            page = site.Pages[prefix + '/' + version]
            page.save(this_version_matrix)

extension_matrix += '\n'

# One subpage for each type of status
extension_matrix += '== By status of extension ==\n*'
status_keys = extensions_by_status.keys()
status_keys.sort()
for this_status in status_keys:
    num_extensions = str(len(extensions_by_status[this_status]))
    extension_matrix += '[[' + prefix + '/' + this_status + '|' + this_status + ']] (' + num_extensions + '), '
    this_status_matrix = '{{ExtensionMatrixHeader}}'
    for extension in extensions_by_status[this_status]:
        this_status_matrix += BuildTemplate(extensions_dicts[extension])
    this_status_matrix += '{{ExtensionMatrixFooter}}'
    page = site.Pages[prefix + '/' + this_status]
    page.save(this_status_matrix)
extension_matrix += '\n'

# One subpage for each extension type
extension_matrix += '== By type of extension ==\n* '
type_keys = extensions_by_type.keys()
type_keys.sort()
for this_type in type_keys[1:]: # [1:] gets rid of weird 'Alterego/ExtensionMatrix' type
    num_extensions = str(len(extensions_by_type[this_type]))
    extension_matrix += '[[' + prefix + '/' + this_type + '|' + this_type + ']] (' + num_extensions + '), '
    this_type_matrix = '{{ExtensionMatrixHeader}}'
    for extension in extensions_by_type[this_type]:
        this_type_matrix += BuildTemplate(extensions_dicts[extension])
    this_type_matrix += '{{ExtensionMatrixFooter}}'
    page = site.Pages[prefix + '/' + this_type]
    page.save(this_type_matrix)
extension_matrix += '\n'

extension_matrix += '== 500 most recently created extensions ==\n* '
for extension in xrange(500):
    extension_name = recently_created[extension][3]
    extension_date = extensions_dicts[extension_name]['created']
    extension_matrix += '[[Extension:' + extension_name + '|' + extension_name + ']] (' + extension_date + '), '
extension_matrix += '\n'

extension_matrix += '== 500 most recently edited extension pages ==\n* '
for extension in xrange(500):
    extension_name = recently_edited[extension][3]
    extension_date = extensions_dicts[extension_name]['lastupdated']
    extension_matrix += '[[Extension:' + extension_name + '|' + extension_name + ']] (' + extension_date + '), '
extension_matrix += '\n'

extension_matrix += '== 500 most recently edited extension talk pages ==\n* '
for extension in xrange(500):
    extension_name = recently_discussed[extension][3]
    extension_date = extensions_dicts[extension_name]['lastupdatedtalk']
    extension_matrix += '[[Extension_talk:' + extension_name + '|' + extension_name + ']] (' + extension_date + '), '
extension_matrix += '\n'

extension_matrix += '== 500 most recently updated extensions ==\n* '
for extension in xrange(500):
    extension_name = recently_updated[extension][3]
    extension_date = extensions_dicts[extension_name]['update']
    extension_matrix += '[[Extension:' + extension_name + '|' + extension_name + ']] (' + extension_date + '), '
extension_matrix += '\n'

# '\n{{ExtensionMatrixFooter}}\n'

page = site.Pages[prefix]
page.save(extension_matrix)

Extension Matrix Hooks Source edit