#!/usr/bin/python import urllib2 import re from xml.dom import minidom # set up some different sort routines def sort_ns_we((name1,lat1,long1,city1),(name2,lat2,long2,city2)): if ((lat2 - lat1) == 0): return (long1 - long2) else: return (lat2 - lat1) def sort_we_ns((name1,lat1,long1,city1),(name2,lat2,long2,city2)): if ((long2 - long1) == 0): return (lat2 - lat1) else: return (long1 - long2) # deal with -0/+0 by shifting +0 and positives one to the right during process def encode_num(num): if (int(num) == 0 and (num.find("-") == -1)): return 1 elif int(num) <= 0: return int(num) else: return int(num) + 1 # shift all the positives back one and return "-0" for the zero def decode_num(num): if (num == 0): return "-0" elif (num < 0): return str(num) else: return str(num - 1) # puts a category and all its graticules in sorted order into the output buffer def output_category(name, graticules): output.append("\n" + name + "\n") graticules.sort(sort_ns_we) for (graticuleName, lat, long, cityName) in graticules: output.append("[[" + graticuleName + "|" + decode_num(lat) + ", " + decode_num(long) + " (" + cityName + ")]]\n") # MAINLINE curCategory = "" curGraticules = [] output = [] # category regex matches the === [[:Category:name|name]] === lines category = re.compile("===.*\[\[:Category:(.*)\|(.*)\]\].*===") # graticule regex maches the [[graticuleName|lat, long (cityName)]] lines graticule = re.compile("\[\[(.*)\|([- 0-9]*),([- 0-9]*)\((.*)\)\]\]") # get the current data from the Active Graticules page text = urllib2.urlopen("http://wiki.xkcd.com/wgh/api.php?action=query&titles=Active_Graticules&prop=revisions&rvprop=content&format=xml") xml = minidom.parse(text) # hack, hack, hack, nasty hack text = unicode(xml.firstChild.firstChild.lastChild.firstChild.firstChild.firstChild.firstChild.data).split("\n") # go through all the current text for line in text: if curCategory == "": # we haven't seen a category yet match = category.match(line) if match: # this is a category line so set the current category curCategory = line else: # this is text before any categories that we want to keep output.append(line) else: # we are in the middle of a category match = category.match(line) if match and (line != curCategory): # we're at the next category, so we output the last one and reset output_category(curCategory, curGraticules) curCategory = line curGraticules = [] match = graticule.match(line) if match: # we've got a graticule line so record the data curGraticules.append( (match.group(1), encode_num(match.group(2)), encode_num(match.group(3)), match.group(4)) ) # we're at the end of the data, we may need to output the last one if curCategory != "": output_category(curCategory, curGraticules) # print out all the output buffer for line in output: print line.encode('utf-8')
(( Sample output removed - no longer matches All Graticules page --Thomcat 14:19, 3 July 2008 (UTC) ))