imported>Tapin |
imported>Tapin |
Line 1: |
Line 1: |
− | #!/usr/bin/ruby
| + | {{delete}} |
− | | |
− | require 'uri'
| |
− | require 'net/http'
| |
− | require 'rexml/document'
| |
− | | |
− | class XkcdUrl
| |
− | attr_accessor :host
| |
− | attr_accessor :path
| |
− | attr_accessor :queryParams
| |
− |
| |
− | def initialize(host = 'wiki.xkcd.com', path = '/wgh/api.php')
| |
− | @host = host
| |
− | @path = path
| |
− | end
| |
− | | |
− | def buildUrl
| |
− | if !@queryParams.nil?
| |
− | queryAry = []
| |
− | queryParams.each_pair{|k, v| queryAry.push "#{k}=#{v}"}
| |
− | url = URI::HTTP.build({:host => @host, :path => @path, :query => queryAry.join('&')})
| |
− | else
| |
− | url = URI::HTTP.build({:host => @host, :path => @path})
| |
− | end
| |
− |
| |
− | return url
| |
− | end
| |
− | end
| |
− | | |
− | # a) Class for category retrieval
| |
− | class MeetupOn
| |
− | attr_accessor :date
| |
− | #TODO: Need parent w/host,path etc
| |
− |
| |
− | def initialize(date = "2008-06-10") #TODO: Change this to default to today
| |
− | @date = date
| |
− | end
| |
− | | |
− | def getUrl
| |
− |
| |
− | end
| |
− | end
| |
− | | |
− | # b) Class for category member retrieval
| |
− | # c) Class for images linked from (b)
| |
− | | |
− | def getMeetupCategories
| |
− | wiki = XkcdUrl.new
| |
− | wiki.queryParams = {}
| |
− | wiki.queryParams["action"] = "query"
| |
− | wiki.queryParams["list"] = "allcategories"
| |
− | wiki.queryParams["acprefix"] = "Meetup_on"
| |
− | wiki.queryParams["prop"] = "info"
| |
− | wiki.queryParams["format"] = "xml"
| |
− | wiki.queryParams["aclimit"] = "500"
| |
− | | |
− | #TODO: This will only retrieve the first 500. We need to be able to page through as
| |
− | # necessary, using the acfrom parameter
| |
− | # (taken from prev result xpath api/query-continue/allcategories, I think)
| |
− | url = wiki.buildUrl
| |
− | r = Net::HTTP.get_response(url)
| |
− | if (r.code != "200")
| |
− | return []
| |
− | end
| |
− | | |
− | cats = []
| |
− | xml = REXML::Document.new r.body
| |
− | xml.elements.each("api/query/allcategories/c") { |elt| cats.push elt.get_text.to_s }
| |
− | return cats
| |
− | end
| |
− | | |
− | def getPagesFromCategory(category)
| |
− | wiki = XkcdUrl.new
| |
− | wiki.queryParams = {}
| |
− | wiki.queryParams["action"] = "query"
| |
− | wiki.queryParams["list"] = "categorymembers"
| |
− | wiki.queryParams["cmtitle"] = "Category:" + category.to_s.tr(' ', '_')
| |
− | wiki.queryParams["cmlimit"] = "500"
| |
− | wiki.queryParams["format"] = "xml"
| |
− | wiki.queryParams["cmnamespace"] = 0 # Only get real pages -- 6 is 'Image:'
| |
− |
| |
− | url = wiki.buildUrl
| |
− | r = Net::HTTP.get_response(url)
| |
− | if (r.code != "200")
| |
− | return []
| |
− | end
| |
− | | |
− | pages = []
| |
− | xml = REXML::Document.new r.body
| |
− | xml.elements.each("api/query/categorymembers/cm") { |elt| pages.push elt.attributes['title'].to_s }
| |
− | return pages
| |
− | end
| |
− | | |
− | def getImagesFromMeetup(meetup)
| |
− | wiki = XkcdUrl.new
| |
− | wiki.queryParams = {}
| |
− | wiki.queryParams["action"] = "query"
| |
− | wiki.queryParams["titles"] = meetup.to_s.tr(' ', '_')
| |
− | wiki.queryParams["prop"] = "images"
| |
− | wiki.queryParams["format"] = "xml"
| |
− | | |
− | url = wiki.buildUrl
| |
− | r = Net::HTTP.get_response(url)
| |
− | if (r.code != "200")
| |
− | return []
| |
− | end
| |
− | | |
− | images = []
| |
− | xml = REXML::Document.new r.body
| |
− | xml.elements.each("api/query/pages/page/images/im") { |elt| images.push elt.attributes['title'].to_s }
| |
− | return images
| |
− | end
| |
− | | |
− | def getCategoriesFromImage(image)
| |
− | wiki = XkcdUrl.new
| |
− | wiki.queryParams = {}
| |
− | wiki.queryParams["action"] = "query"
| |
− | wiki.queryParams["titles"] = image.to_s.tr(' ', '_')
| |
− | wiki.queryParams["prop"] = "categories"
| |
− | wiki.queryParams["format"] = "xml"
| |
− |
| |
− | url = wiki.buildUrl
| |
− | r = Net::HTTP.get_response(url)
| |
− | if (r.code != "200")
| |
− | return []
| |
− | end
| |
− | | |
− | cats = []
| |
− | xml = REXML::Document.new r.body
| |
− | xml.elements.each("api/query/pages/page/categories/cl") { |elt| cats.push elt.attributes['title'].to_s }
| |
− | return cats
| |
− | end
| |
− | | |
− | def main
| |
− | cats = getMeetupCategories
| |
− | sleep 1
| |
− | cats.each do |cat|
| |
− | puts "Category:#{cat}"
| |
− | pages = getPagesFromCategory(cat)
| |
− | sleep 1
| |
− | pages.each do |page|
| |
− | puts " #{page}"
| |
− | images = getImagesFromMeetup(page)
| |
− | sleep 1
| |
− | images.each do |image|
| |
− | puts " #{image}"
| |
− | imageCats = getCategoriesFromImage(image)
| |
− | sleep 1
| |
− | if imageCats.member? "Category:#{cat}"
| |
− | puts "Found an image that _is_ categorized: #{image} in #{cat}"
| |
− | end
| |
− | end
| |
− | end
| |
− | end
| |
− | end
| |
− | | |
− | main
| |