# File: Dermimages.py
# By MastCell
# Released for any and all reuse and modification
# Use at your own risk.
# ========================
# This script does the following:
# 1. Load all articles linked to the Dermatology Task Force,
# using Category:Dermatology task force articles.
# 2. Check each page for images, removing those which are part of
# common templates
# 3. Output the results in a sortable wikitable which can be
# cut-and-pasted onto Wikipedia.
#
# The goal is to assess how many dermatology-related articles currently
# lack images, and to assess the overall prevalence of images across
# derm-related articles.
#
# The script can be run from the command line. It will create a file
# called "dermimages_output.txt" in the same directory where the script
# itself resides. Note that if such a file already exists, it will be
# overwritten. The output format is meant to be cut-and-pasted into
# Wikipedia as a wikitable.
# =========================
# MWclient module for Wikimedia API calls
import mwclient
# Global set of image names to exclude
# (include images from templates, featured article stars, etc
# which should not be counted as "content" images)
# Modify as needed.
global_exclude_list =\
set(["Normal Epidermis and Dermis with Intradermal Nevus 10x.JPG",\
"LinkFA-star.png",\
"Featured article star.svg",\
"Symbol support vote.svg",\
"Rod of Asclepius2.svg",\
"Mergefrom.svg",\
"Gray944.png",\
"Question book-new.svg",\
"Ambox contradict.svg",\
"Mitotic spindle color micrograph.gif",\
"Ambox content.png",\
"Text document with red question mark.svg",\
"Edit-clear.svg",\
"UK-Medical-Bio-Stub.svg",\
"Flag of Germany.svg",\
"Commons-logo.svg",\
"Wiki letter w.svg",\
"Chromosome.svg",\
"DNA stub.png",\
"Merge-arrow.svg"])
# Global dictionary to count how many pages have zero images, one image, two images, ...
global_imagenums = {"Total pages": 0,\
"Pages with zero images": 0,\
"Pages with one image": 0,\
"Pages with two images": 0,\
"Pages with three images": 0,\
"Pages with four or more images": 0}
# Open the site and collect pages from category
# (Note that these will generally be article talk pages, since that's
# where the Derm task force template is typically placed)
wpHandle = mwclient.Site('en.wikipedia.org')
dermTalkPages = wpHandle.Pages['Category:Dermatology task force articles']
# Main program loop: load and process each page
def main_program():
setUpTable()
for page in dermTalkPages:
# Make sure we're dealing with the article page, rather than talk page
page = wpHandle.Pages[page.page_title]
# Increment the total page count
global_imagenums["Total pages"] += 1
# Load images and process them
imageHandle = page.images
imageList = imageHandle()
processPage(page, imageList)
closeTable()
outputDictionary()
# Page processing function
# Note to self: need to encode the page and image names. Otherwise
# the script will eventually choke with a UnicodeEncodingError.
# Hopefully the XMLchar replacements will be properly rendered by
# Wikimedia.
def processPage(page, imageList):
outputFile.write("|-\n")
outputFile.write("| [[")
outputFile.write(page.name.encode("iso-8859-15", "xmlcharrefreplace"))
outputFile.write("]] ||")
imageCount = 0
for image in imageList:
if (image.page_title not in global_exclude_list):
if (imageCount > 0):
outputFile.write("<br>\n")
imageCount += 1
outputFile.write('[[:' + image.name.encode("iso-8859-15", "xmlcharrefreplace"))
outputFile.write(']]')
outputFile.write("\n" + ' || ' + str(imageCount) + "\n")
incrementPageCounter(imageCount)
# Function to update the dictionary of pages
# I'm sure there's a more elegant way to do this, but...
def incrementPageCounter(numImages):
if (numImages is 0):
global_imagenums["Pages with zero images"] += 1
elif (numImages is 1):
global_imagenums["Pages with one image"] += 1
elif (numImages is 2):
global_imagenums["Pages with two images"] += 1
elif (numImages is 3):
global_imagenums["Pages with three images"] += 1
else:
global_imagenums["Pages with four or more images"] += 1
# Output the table header boilerplate
def setUpTable():
outputFile.write('{| class="wikitable sortable" border="1"')
outputFile.write("\n")
outputFile.write('! Page !! class="unsortable" | Images !! Number of images')
outputFile.write("\n")
# Output the table footer boilerplate
def closeTable():
outputFile.write('|}')
# Output the dictionary counts
def outputDictionary():
outputFile.write("\n== Totals by number of images ==\n")
for key, value in global_imagenums.iteritems():
outputFile.write(key + ": ")
outputFile.write(str(value))
outputFile.write("\n")
########################################
# Main program
# ------------
# Opens a handle to the output file, then runs the main loop
########################################
with open('dermimages_output.txt', 'w') as outputFile:
main_program()