1<?php 2/** 3 * Fetch the list of language names from the URL below. This script requires the PHP DOM module. 4 */ 5 6$url = "http://meta.wikimedia.org/wiki/Template:List_of_language_names_ordered_by_code"; 7$out = dirname(__FILE__)."/langnames.php"; 8 9//--------- 10 11if (file_exists($out)) { 12 die("File already exists: $out\n"); 13} 14 15// fetch page from URL and parse it 16error_reporting(E_ALL & ~E_WARNING); 17$doc = new DOMDocument(); 18$doc->loadHTMLFile($url); 19error_reporting(E_ALL); 20 21// find the data we're interested in 22$tables = $doc->getElementsByTagName('table'); 23if (empty($tables)) die ("No table found"); 24$table = $tables->item(0); 25$rows = $table->childNodes; 26$langs = array(); 27foreach ($rows as $tr) { 28 if ($tr->nodeName != 'tr') continue; 29 $tds = $tr->childNodes; 30 if ($tds->item(0)->tagName == 'th' 31 && trim($tds->item(0)->textContent) == 'Old projects') { 32 // skip everything under 'old projects' 33 break; 34 } 35 unset ($code); 36 $row = array(); 37 foreach ($tds as $td) { 38 if ($td->nodeName != 'td') continue; 39 if (!isset($code)) $code = trim($td->textContent); 40 else $row[] = trim($td->textContent); 41 } 42 if (isset($code)) $langs[$code] = $row[2]; 43} 44 45// write the data to a PHP file 46ob_start(); 47echo '<'."?php\n"; 48echo "# This is a generated file.\n"; 49echo "# Generated from URL: $url\n"; 50echo "# Generated date: ".date('c')."\n"; 51echo "# Encoding: UTF-8\n"; 52echo "\n"; 53echo '$langnames = '; var_export($langs); echo ";\n"; 54$php = ob_get_clean(); 55$ok = file_put_contents($out, $php); 56if ($ok) echo "File written successfully.\n"; 57 58// vim:ex:ts=2:sw=2:enc=utf-8: 59 60