Return page titles for a specified category
From Geohashing
Revision as of 14:41, 17 February 2015 by imported>Sourcerer (→Implementation)
Using MediaWiki api.php
- Hello World!
- Return page titles for a specified category
- Return template contents from a wiki page
- Wiki built-in templates
Get every page title for a specified category
This fetches 100 page titles at a time with 5 second pauses between fetches to avoid loading the wiki too much.
<?php // ======================================================================= // === These constant values must be set before running the script ======= // ======================================================================= define("API_URL", 'http://wiki.xkcd.com/wgh/api.php'); // Search any wiki page source for "api.php" to find this path on other MediaWiki sites define("THE_CATEGORY", "Category:Consecutive_geohash_achievement"); // ======================================================================= // Return an array containing the cmcontinue value followed by page titles // Index [0] contains cmcontinue needed to get the next page // Index [1] up to [cmlimit] contains the page titles, could be zero items // ======================================================================= function getTitlesInCategory($cmtitle, $cmlimit = 10, $cmcontinue = "") { if ($cmcontinue == "") { $continue = ""; } else { $continue = "&cmcontinue=$cmcontinue"; } $url = API_URL . "?action=query&format=json&list=categorymembers&cmtitle=$cmtitle&cmlimit=$cmlimit$continue"; $json = file_get_contents($url); $decodedjson = json_decode($json, true); echo $url . "\n"; $titles = array(); if (isset($decodedjson['query-continue'])) { $titles[] = $decodedjson['query-continue']['categorymembers']['cmcontinue']; // Next page if there is one } else { $titles[] = ""; } foreach($decodedjson['query']['categorymembers'] as $value) { $titles[] = $value['title']; } return $titles; } // ======================================================================= // ======================================================================= // Return an array containing ALL page titles for the category // ======================================================================= function getAllTitlesInCategory($cmtitle) { $allTitles = array(); $titles = getTitlesInCategory($cmtitle, 100); // print_r($titles); $cmcontinue = $titles[0]; unset($titles[0]); $allTitles = array_merge($allTitles, $titles); while($cmcontinue != "") { sleep(5); $titles = getTitlesInCategory($cmtitle, 100, $cmcontinue); // print_r($titles); $cmcontinue = $titles[0]; unset($titles[0]); $allTitles = array_merge($allTitles, $titles); } return $allTitles; } // ======================================================================= // ======================================================================= // main program // ======================================================================= print_r(getAllTitlesInCategory(THE_CATEGORY)); ?>