150b78159SElan Ruusamäe#!/usr/bin/php 21caeb00aSHarry Fuecks<?php 3cbeaa4a0SAndreas Gohr 4cbeaa4a0SAndreas Gohruse splitbrain\phpcli\CLI; 5cbeaa4a0SAndreas Gohruse splitbrain\phpcli\Options; 6*0489c64bSMoisés Braga Ribeirouse dokuwiki\Utf8\Sort; 7cbeaa4a0SAndreas Gohr 81caeb00aSHarry Fuecksif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/'); 9e8bb93a5SAndreas Gohrdefine('NOSESSION', 1); 10e8bb93a5SAndreas Gohrrequire_once(DOKU_INC . 'inc/init.php'); 111caeb00aSHarry Fuecks 121c36b3d8SAndreas Gohr/** 131c36b3d8SAndreas Gohr * Find wanted pages 141c36b3d8SAndreas Gohr */ 15cbeaa4a0SAndreas Gohrclass WantedPagesCLI extends CLI { 161caeb00aSHarry Fuecks 17e8bb93a5SAndreas Gohr const DIR_CONTINUE = 1; 18e8bb93a5SAndreas Gohr const DIR_NS = 2; 19e8bb93a5SAndreas Gohr const DIR_PAGE = 3; 2064cebf71SAndreas Gohr 2164cebf71SAndreas Gohr private $skip = false; 2264cebf71SAndreas Gohr private $sort = 'wanted'; 2364cebf71SAndreas Gohr 2464cebf71SAndreas Gohr private $result = array(); 2564cebf71SAndreas Gohr 26e8bb93a5SAndreas Gohr /** 27e8bb93a5SAndreas Gohr * Register options and arguments on the given $options object 28e8bb93a5SAndreas Gohr * 29cbeaa4a0SAndreas Gohr * @param Options $options 30e8bb93a5SAndreas Gohr * @return void 31e8bb93a5SAndreas Gohr */ 32cbeaa4a0SAndreas Gohr protected function setup(Options $options) { 33e8bb93a5SAndreas Gohr $options->setHelp( 3464cebf71SAndreas Gohr 'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' . 3564cebf71SAndreas Gohr ' (the pages that are linkin to these missing pages).' 36e8bb93a5SAndreas Gohr ); 37e8bb93a5SAndreas Gohr $options->registerArgument( 38e8bb93a5SAndreas Gohr 'namespace', 39e8bb93a5SAndreas Gohr 'The namespace to lookup. Defaults to root namespace', 40e8bb93a5SAndreas Gohr false 41e8bb93a5SAndreas Gohr ); 4264cebf71SAndreas Gohr 4364cebf71SAndreas Gohr $options->registerOption( 4464cebf71SAndreas Gohr 'sort', 4564cebf71SAndreas Gohr 'Sort by wanted or origin page', 4664cebf71SAndreas Gohr 's', 4764cebf71SAndreas Gohr '(wanted|origin)' 48d63d2c63SMyron Turner ); 49d63d2c63SMyron Turner 5064cebf71SAndreas Gohr $options->registerOption( 5164cebf71SAndreas Gohr 'skip', 5264cebf71SAndreas Gohr 'Do not show the second dimension', 5364cebf71SAndreas Gohr 'k' 5464cebf71SAndreas Gohr ); 551caeb00aSHarry Fuecks } 561caeb00aSHarry Fuecks 57e8bb93a5SAndreas Gohr /** 58e8bb93a5SAndreas Gohr * Your main program 59e8bb93a5SAndreas Gohr * 60e8bb93a5SAndreas Gohr * Arguments and options have been parsed when this is run 61e8bb93a5SAndreas Gohr * 62cbeaa4a0SAndreas Gohr * @param Options $options 63e8bb93a5SAndreas Gohr * @return void 64e8bb93a5SAndreas Gohr */ 65cbeaa4a0SAndreas Gohr protected function main(Options $options) { 66cbeaa4a0SAndreas Gohr $args = $options->getArgs(); 67cbeaa4a0SAndreas Gohr if($args) { 68cbeaa4a0SAndreas Gohr $startdir = dirname(wikiFN($args[0] . ':xxx')); 69e8bb93a5SAndreas Gohr } else { 70e8bb93a5SAndreas Gohr $startdir = dirname(wikiFN('xxx')); 71e8bb93a5SAndreas Gohr } 72964efa9cSMyron Turner 7364cebf71SAndreas Gohr $this->skip = $options->getOpt('skip'); 7464cebf71SAndreas Gohr $this->sort = $options->getOpt('sort'); 75e8bb93a5SAndreas Gohr 76e8bb93a5SAndreas Gohr $this->info("searching $startdir"); 77e8bb93a5SAndreas Gohr 782b2d0ba9SAndreas Gohr foreach($this->getPages($startdir) as $page) { 792b2d0ba9SAndreas Gohr $this->internalLinks($page); 80e8bb93a5SAndreas Gohr } 81*0489c64bSMoisés Braga Ribeiro Sort::ksort($this->result); 8264cebf71SAndreas Gohr foreach($this->result as $main => $subs) { 8364cebf71SAndreas Gohr if($this->skip) { 8464cebf71SAndreas Gohr print "$main\n"; 8564cebf71SAndreas Gohr } else { 8664cebf71SAndreas Gohr $subs = array_unique($subs); 87*0489c64bSMoisés Braga Ribeiro Sort::sort($subs); 8864cebf71SAndreas Gohr foreach($subs as $sub) { 8964cebf71SAndreas Gohr printf("%-40s %s\n", $main, $sub); 9064cebf71SAndreas Gohr } 9164cebf71SAndreas Gohr } 92e8bb93a5SAndreas Gohr } 93e8bb93a5SAndreas Gohr } 94e8bb93a5SAndreas Gohr 9542ea7f44SGerrit Uitslag /** 9642ea7f44SGerrit Uitslag * Determine directions of the search loop 9742ea7f44SGerrit Uitslag * 9842ea7f44SGerrit Uitslag * @param string $entry 9942ea7f44SGerrit Uitslag * @param string $basepath 10042ea7f44SGerrit Uitslag * @return int 10142ea7f44SGerrit Uitslag */ 1022b2d0ba9SAndreas Gohr protected function dirFilter($entry, $basepath) { 1031caeb00aSHarry Fuecks if($entry == '.' || $entry == '..') { 104e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 1051caeb00aSHarry Fuecks } 1061caeb00aSHarry Fuecks if(is_dir($basepath . '/' . $entry)) { 1071caeb00aSHarry Fuecks if(strpos($entry, '_') === 0) { 108e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 1091caeb00aSHarry Fuecks } 110e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_NS; 1111caeb00aSHarry Fuecks } 1121caeb00aSHarry Fuecks if(preg_match('/\.txt$/', $entry)) { 113e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_PAGE; 1141caeb00aSHarry Fuecks } 115e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 1161caeb00aSHarry Fuecks } 1171caeb00aSHarry Fuecks 11842ea7f44SGerrit Uitslag /** 11942ea7f44SGerrit Uitslag * Collects recursively the pages in a namespace 12042ea7f44SGerrit Uitslag * 12142ea7f44SGerrit Uitslag * @param string $dir 12242ea7f44SGerrit Uitslag * @return array 12342ea7f44SGerrit Uitslag * @throws DokuCLI_Exception 12442ea7f44SGerrit Uitslag */ 1252b2d0ba9SAndreas Gohr protected function getPages($dir) { 1260ea51e63SMatt Perry static $trunclen = null; 1271caeb00aSHarry Fuecks if(!$trunclen) { 1281caeb00aSHarry Fuecks global $conf; 1291caeb00aSHarry Fuecks $trunclen = strlen($conf['datadir'] . ':'); 1301caeb00aSHarry Fuecks } 1311caeb00aSHarry Fuecks 1321caeb00aSHarry Fuecks if(!is_dir($dir)) { 133e8bb93a5SAndreas Gohr throw new DokuCLI_Exception("Unable to read directory $dir"); 1341caeb00aSHarry Fuecks } 1351caeb00aSHarry Fuecks 1361caeb00aSHarry Fuecks $pages = array(); 1371caeb00aSHarry Fuecks $dh = opendir($dir); 13844881bd0Shenning.noren while(false !== ($entry = readdir($dh))) { 1392b2d0ba9SAndreas Gohr $status = $this->dirFilter($entry, $dir); 140e8bb93a5SAndreas Gohr if($status == WantedPagesCLI::DIR_CONTINUE) { 1411caeb00aSHarry Fuecks continue; 142e8bb93a5SAndreas Gohr } else if($status == WantedPagesCLI::DIR_NS) { 1432b2d0ba9SAndreas Gohr $pages = array_merge($pages, $this->getPages($dir . '/' . $entry)); 1441caeb00aSHarry Fuecks } else { 1451caeb00aSHarry Fuecks $page = array( 146840583dcSAndreas Gohr 'id' => pathID(substr($dir . '/' . $entry, $trunclen)), 1471caeb00aSHarry Fuecks 'file' => $dir . '/' . $entry, 1481caeb00aSHarry Fuecks ); 1491caeb00aSHarry Fuecks $pages[] = $page; 1501caeb00aSHarry Fuecks } 1511caeb00aSHarry Fuecks } 1521caeb00aSHarry Fuecks closedir($dh); 1531caeb00aSHarry Fuecks return $pages; 1541caeb00aSHarry Fuecks } 1551caeb00aSHarry Fuecks 15642ea7f44SGerrit Uitslag /** 15764cebf71SAndreas Gohr * Parse instructions and add the non-existing links to the result array 15842ea7f44SGerrit Uitslag * 15942ea7f44SGerrit Uitslag * @param array $page array with page id and file path 16042ea7f44SGerrit Uitslag */ 1612b2d0ba9SAndreas Gohr protected function internalLinks($page) { 1621caeb00aSHarry Fuecks global $conf; 1631caeb00aSHarry Fuecks $instructions = p_get_instructions(file_get_contents($page['file'])); 1641caeb00aSHarry Fuecks $cns = getNS($page['id']); 16544881bd0Shenning.noren $exists = false; 166ffe3602cSMyron Turner $pid = $page['id']; 1671caeb00aSHarry Fuecks foreach($instructions as $ins) { 1681caeb00aSHarry Fuecks if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) { 1691caeb00aSHarry Fuecks $mid = $ins[1][0]; 1701caeb00aSHarry Fuecks resolve_pageid($cns, $mid, $exists); 1711caeb00aSHarry Fuecks if(!$exists) { 17264cebf71SAndreas Gohr list($mid) = explode('#', $mid); //record pages without hashes 17364cebf71SAndreas Gohr 17464cebf71SAndreas Gohr if($this->sort == 'origin') { 17564cebf71SAndreas Gohr $this->result[$pid][] = $mid; 17664cebf71SAndreas Gohr } else { 17764cebf71SAndreas Gohr $this->result[$mid][] = $pid; 178d63d2c63SMyron Turner } 1791caeb00aSHarry Fuecks } 1801caeb00aSHarry Fuecks } 18164cebf71SAndreas Gohr } 1821caeb00aSHarry Fuecks } 1831caeb00aSHarry Fuecks} 1841caeb00aSHarry Fuecks 185e8bb93a5SAndreas Gohr// Main 186e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI(); 187e8bb93a5SAndreas Gohr$cli->run(); 188