150b78159SElan Ruusamäe#!/usr/bin/php 21caeb00aSHarry Fuecks<?php 31caeb00aSHarry Fuecksif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/'); 4e8bb93a5SAndreas Gohrdefine('NOSESSION', 1); 5e8bb93a5SAndreas Gohrrequire_once(DOKU_INC . 'inc/init.php'); 61caeb00aSHarry Fuecks 71c36b3d8SAndreas Gohr/** 81c36b3d8SAndreas Gohr * Find wanted pages 91c36b3d8SAndreas Gohr */ 10e8bb93a5SAndreas Gohrclass WantedPagesCLI extends DokuCLI { 111caeb00aSHarry Fuecks 12e8bb93a5SAndreas Gohr const DIR_CONTINUE = 1; 13e8bb93a5SAndreas Gohr const DIR_NS = 2; 14e8bb93a5SAndreas Gohr const DIR_PAGE = 3; 15*64cebf71SAndreas Gohr 16*64cebf71SAndreas Gohr private $skip = false; 17*64cebf71SAndreas Gohr private $sort = 'wanted'; 18*64cebf71SAndreas Gohr 19*64cebf71SAndreas Gohr private $result = array(); 20*64cebf71SAndreas Gohr 21e8bb93a5SAndreas Gohr /** 22e8bb93a5SAndreas Gohr * Register options and arguments on the given $options object 23e8bb93a5SAndreas Gohr * 24e8bb93a5SAndreas Gohr * @param DokuCLI_Options $options 25e8bb93a5SAndreas Gohr * @return void 26e8bb93a5SAndreas Gohr */ 27e8bb93a5SAndreas Gohr protected function setup(DokuCLI_Options $options) { 28e8bb93a5SAndreas Gohr $options->setHelp( 29*64cebf71SAndreas Gohr 'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' . 30*64cebf71SAndreas Gohr ' (the pages that are linkin to these missing pages).' 31e8bb93a5SAndreas Gohr ); 32e8bb93a5SAndreas Gohr $options->registerArgument( 33e8bb93a5SAndreas Gohr 'namespace', 34e8bb93a5SAndreas Gohr 'The namespace to lookup. Defaults to root namespace', 35e8bb93a5SAndreas Gohr false 36e8bb93a5SAndreas Gohr ); 37*64cebf71SAndreas Gohr 38*64cebf71SAndreas Gohr $options->registerOption( 39*64cebf71SAndreas Gohr 'sort', 40*64cebf71SAndreas Gohr 'Sort by wanted or origin page', 41*64cebf71SAndreas Gohr 's', 42*64cebf71SAndreas Gohr '(wanted|origin)' 43d63d2c63SMyron Turner ); 44d63d2c63SMyron Turner 45*64cebf71SAndreas Gohr $options->registerOption( 46*64cebf71SAndreas Gohr 'skip', 47*64cebf71SAndreas Gohr 'Do not show the second dimension', 48*64cebf71SAndreas Gohr 'k' 49*64cebf71SAndreas Gohr ); 501caeb00aSHarry Fuecks } 511caeb00aSHarry Fuecks 52e8bb93a5SAndreas Gohr /** 53e8bb93a5SAndreas Gohr * Your main program 54e8bb93a5SAndreas Gohr * 55e8bb93a5SAndreas Gohr * Arguments and options have been parsed when this is run 56e8bb93a5SAndreas Gohr * 57e8bb93a5SAndreas Gohr * @param DokuCLI_Options $options 58e8bb93a5SAndreas Gohr * @return void 59e8bb93a5SAndreas Gohr */ 60e8bb93a5SAndreas Gohr protected function main(DokuCLI_Options $options) { 611caeb00aSHarry Fuecks 62e8bb93a5SAndreas Gohr if($options->args) { 63e8bb93a5SAndreas Gohr $startdir = dirname(wikiFN($options->args[0] . ':xxx')); 64e8bb93a5SAndreas Gohr } else { 65e8bb93a5SAndreas Gohr $startdir = dirname(wikiFN('xxx')); 66e8bb93a5SAndreas Gohr } 67964efa9cSMyron Turner 68*64cebf71SAndreas Gohr $this->skip = $options->getOpt('skip'); 69*64cebf71SAndreas Gohr $this->sort = $options->getOpt('sort'); 70e8bb93a5SAndreas Gohr 71e8bb93a5SAndreas Gohr $this->info("searching $startdir"); 72e8bb93a5SAndreas Gohr 73e8bb93a5SAndreas Gohr foreach($this->get_pages($startdir) as $page) { 74*64cebf71SAndreas Gohr $this->internal_links($page); 75e8bb93a5SAndreas Gohr } 76*64cebf71SAndreas Gohr ksort($this->result); 77*64cebf71SAndreas Gohr foreach($this->result as $main => $subs) { 78*64cebf71SAndreas Gohr if($this->skip) { 79*64cebf71SAndreas Gohr print "$main\n"; 80*64cebf71SAndreas Gohr } else { 81*64cebf71SAndreas Gohr $subs = array_unique($subs); 82*64cebf71SAndreas Gohr sort($subs); 83*64cebf71SAndreas Gohr foreach($subs as $sub) { 84*64cebf71SAndreas Gohr printf("%-40s %s\n", $main, $sub); 85*64cebf71SAndreas Gohr } 86*64cebf71SAndreas Gohr } 87e8bb93a5SAndreas Gohr } 88e8bb93a5SAndreas Gohr } 89e8bb93a5SAndreas Gohr 9042ea7f44SGerrit Uitslag /** 9142ea7f44SGerrit Uitslag * Determine directions of the search loop 9242ea7f44SGerrit Uitslag * 9342ea7f44SGerrit Uitslag * @param string $entry 9442ea7f44SGerrit Uitslag * @param string $basepath 9542ea7f44SGerrit Uitslag * @return int 9642ea7f44SGerrit Uitslag */ 97e8bb93a5SAndreas Gohr protected function dir_filter($entry, $basepath) { 981caeb00aSHarry Fuecks if($entry == '.' || $entry == '..') { 99e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 1001caeb00aSHarry Fuecks } 1011caeb00aSHarry Fuecks if(is_dir($basepath . '/' . $entry)) { 1021caeb00aSHarry Fuecks if(strpos($entry, '_') === 0) { 103e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 1041caeb00aSHarry Fuecks } 105e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_NS; 1061caeb00aSHarry Fuecks } 1071caeb00aSHarry Fuecks if(preg_match('/\.txt$/', $entry)) { 108e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_PAGE; 1091caeb00aSHarry Fuecks } 110e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 1111caeb00aSHarry Fuecks } 1121caeb00aSHarry Fuecks 11342ea7f44SGerrit Uitslag /** 11442ea7f44SGerrit Uitslag * Collects recursively the pages in a namespace 11542ea7f44SGerrit Uitslag * 11642ea7f44SGerrit Uitslag * @param string $dir 11742ea7f44SGerrit Uitslag * @return array 11842ea7f44SGerrit Uitslag * @throws DokuCLI_Exception 11942ea7f44SGerrit Uitslag */ 120e8bb93a5SAndreas Gohr protected function get_pages($dir) { 1210ea51e63SMatt Perry static $trunclen = null; 1221caeb00aSHarry Fuecks if(!$trunclen) { 1231caeb00aSHarry Fuecks global $conf; 1241caeb00aSHarry Fuecks $trunclen = strlen($conf['datadir'] . ':'); 1251caeb00aSHarry Fuecks } 1261caeb00aSHarry Fuecks 1271caeb00aSHarry Fuecks if(!is_dir($dir)) { 128e8bb93a5SAndreas Gohr throw new DokuCLI_Exception("Unable to read directory $dir"); 1291caeb00aSHarry Fuecks } 1301caeb00aSHarry Fuecks 1311caeb00aSHarry Fuecks $pages = array(); 1321caeb00aSHarry Fuecks $dh = opendir($dir); 13344881bd0Shenning.noren while(false !== ($entry = readdir($dh))) { 134e8bb93a5SAndreas Gohr $status = $this->dir_filter($entry, $dir); 135e8bb93a5SAndreas Gohr if($status == WantedPagesCLI::DIR_CONTINUE) { 1361caeb00aSHarry Fuecks continue; 137e8bb93a5SAndreas Gohr } else if($status == WantedPagesCLI::DIR_NS) { 138e8bb93a5SAndreas Gohr $pages = array_merge($pages, $this->get_pages($dir . '/' . $entry)); 1391caeb00aSHarry Fuecks } else { 1401caeb00aSHarry Fuecks $page = array( 141840583dcSAndreas Gohr 'id' => pathID(substr($dir . '/' . $entry, $trunclen)), 1421caeb00aSHarry Fuecks 'file' => $dir . '/' . $entry, 1431caeb00aSHarry Fuecks ); 1441caeb00aSHarry Fuecks $pages[] = $page; 1451caeb00aSHarry Fuecks } 1461caeb00aSHarry Fuecks } 1471caeb00aSHarry Fuecks closedir($dh); 1481caeb00aSHarry Fuecks return $pages; 1491caeb00aSHarry Fuecks } 1501caeb00aSHarry Fuecks 15142ea7f44SGerrit Uitslag /** 152*64cebf71SAndreas Gohr * Parse instructions and add the non-existing links to the result array 15342ea7f44SGerrit Uitslag * 15442ea7f44SGerrit Uitslag * @param array $page array with page id and file path 15542ea7f44SGerrit Uitslag */ 156e8bb93a5SAndreas Gohr function internal_links($page) { 1571caeb00aSHarry Fuecks global $conf; 1581caeb00aSHarry Fuecks $instructions = p_get_instructions(file_get_contents($page['file'])); 1591caeb00aSHarry Fuecks $cns = getNS($page['id']); 16044881bd0Shenning.noren $exists = false; 161ffe3602cSMyron Turner $pid = $page['id']; 1621caeb00aSHarry Fuecks foreach($instructions as $ins) { 1631caeb00aSHarry Fuecks if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) { 1641caeb00aSHarry Fuecks $mid = $ins[1][0]; 1651caeb00aSHarry Fuecks resolve_pageid($cns, $mid, $exists); 1661caeb00aSHarry Fuecks if(!$exists) { 167*64cebf71SAndreas Gohr list($mid) = explode('#', $mid); //record pages without hashes 168*64cebf71SAndreas Gohr 169*64cebf71SAndreas Gohr if($this->sort == 'origin') { 170*64cebf71SAndreas Gohr $this->result[$pid][] = $mid; 171*64cebf71SAndreas Gohr } else { 172*64cebf71SAndreas Gohr $this->result[$mid][] = $pid; 173d63d2c63SMyron Turner } 1741caeb00aSHarry Fuecks } 1751caeb00aSHarry Fuecks } 176*64cebf71SAndreas Gohr } 1771caeb00aSHarry Fuecks } 1781caeb00aSHarry Fuecks} 1791caeb00aSHarry Fuecks 180e8bb93a5SAndreas Gohr// Main 181e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI(); 182e8bb93a5SAndreas Gohr$cli->run(); 183