1cbfa4829SPhy#!/usr/bin/env php 21caeb00aSHarry Fuecks<?php 3cbeaa4a0SAndreas Gohr 48c6be208SAndreas Gohruse dokuwiki\Utf8\Sort; 52cd6cc0aSAndreas Gohruse dokuwiki\File\PageResolver; 6cbeaa4a0SAndreas Gohruse splitbrain\phpcli\CLI; 7cbeaa4a0SAndreas Gohruse splitbrain\phpcli\Options; 8cbeaa4a0SAndreas Gohr 9b1f206e1SAndreas Gohrif (!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/'); 10e8bb93a5SAndreas Gohrdefine('NOSESSION', 1); 11e8bb93a5SAndreas Gohrrequire_once(DOKU_INC . 'inc/init.php'); 121caeb00aSHarry Fuecks 131c36b3d8SAndreas Gohr/** 141c36b3d8SAndreas Gohr * Find wanted pages 151c36b3d8SAndreas Gohr */ 168c7c53b0SAndreas Gohrclass WantedPagesCLI extends CLI 178c7c53b0SAndreas Gohr{ 181caeb00aSHarry Fuecks 19b1f206e1SAndreas Gohr protected const DIR_CONTINUE = 1; 20b1f206e1SAndreas Gohr protected const DIR_NS = 2; 21b1f206e1SAndreas Gohr protected const DIR_PAGE = 3; 2264cebf71SAndreas Gohr 2364cebf71SAndreas Gohr private $skip = false; 2464cebf71SAndreas Gohr private $sort = 'wanted'; 2564cebf71SAndreas Gohr 26b1f206e1SAndreas Gohr private $result = []; 2764cebf71SAndreas Gohr 28e8bb93a5SAndreas Gohr /** 29e8bb93a5SAndreas Gohr * Register options and arguments on the given $options object 30e8bb93a5SAndreas Gohr * 31cbeaa4a0SAndreas Gohr * @param Options $options 32e8bb93a5SAndreas Gohr * @return void 33e8bb93a5SAndreas Gohr */ 34d868eb89SAndreas Gohr protected function setup(Options $options) 35d868eb89SAndreas Gohr { 36e8bb93a5SAndreas Gohr $options->setHelp( 3764cebf71SAndreas Gohr 'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' . 3864cebf71SAndreas Gohr ' (the pages that are linkin to these missing pages).' 39e8bb93a5SAndreas Gohr ); 40e8bb93a5SAndreas Gohr $options->registerArgument( 41e8bb93a5SAndreas Gohr 'namespace', 42e8bb93a5SAndreas Gohr 'The namespace to lookup. Defaults to root namespace', 43e8bb93a5SAndreas Gohr false 44e8bb93a5SAndreas Gohr ); 4564cebf71SAndreas Gohr 4664cebf71SAndreas Gohr $options->registerOption( 4764cebf71SAndreas Gohr 'sort', 4864cebf71SAndreas Gohr 'Sort by wanted or origin page', 4964cebf71SAndreas Gohr 's', 5064cebf71SAndreas Gohr '(wanted|origin)' 51d63d2c63SMyron Turner ); 52d63d2c63SMyron Turner 5364cebf71SAndreas Gohr $options->registerOption( 5464cebf71SAndreas Gohr 'skip', 5564cebf71SAndreas Gohr 'Do not show the second dimension', 5664cebf71SAndreas Gohr 'k' 5764cebf71SAndreas Gohr ); 581caeb00aSHarry Fuecks } 591caeb00aSHarry Fuecks 60e8bb93a5SAndreas Gohr /** 61e8bb93a5SAndreas Gohr * Your main program 62e8bb93a5SAndreas Gohr * 63e8bb93a5SAndreas Gohr * Arguments and options have been parsed when this is run 64e8bb93a5SAndreas Gohr * 65cbeaa4a0SAndreas Gohr * @param Options $options 66e8bb93a5SAndreas Gohr * @return void 67e8bb93a5SAndreas Gohr */ 68d868eb89SAndreas Gohr protected function main(Options $options) 69d868eb89SAndreas Gohr { 70cbeaa4a0SAndreas Gohr $args = $options->getArgs(); 71cbeaa4a0SAndreas Gohr if ($args) { 72cbeaa4a0SAndreas Gohr $startdir = dirname(wikiFN($args[0] . ':xxx')); 73e8bb93a5SAndreas Gohr } else { 74e8bb93a5SAndreas Gohr $startdir = dirname(wikiFN('xxx')); 75e8bb93a5SAndreas Gohr } 76964efa9cSMyron Turner 7764cebf71SAndreas Gohr $this->skip = $options->getOpt('skip'); 7864cebf71SAndreas Gohr $this->sort = $options->getOpt('sort'); 79e8bb93a5SAndreas Gohr 80e8bb93a5SAndreas Gohr $this->info("searching $startdir"); 81e8bb93a5SAndreas Gohr 822b2d0ba9SAndreas Gohr foreach ($this->getPages($startdir) as $page) { 832b2d0ba9SAndreas Gohr $this->internalLinks($page); 84e8bb93a5SAndreas Gohr } 850489c64bSMoisés Braga Ribeiro Sort::ksort($this->result); 8664cebf71SAndreas Gohr foreach ($this->result as $main => $subs) { 8764cebf71SAndreas Gohr if ($this->skip) { 88*26dfc232SAndreas Gohr echo "$main\n"; 8964cebf71SAndreas Gohr } else { 9064cebf71SAndreas Gohr $subs = array_unique($subs); 910489c64bSMoisés Braga Ribeiro Sort::sort($subs); 9264cebf71SAndreas Gohr foreach ($subs as $sub) { 9364cebf71SAndreas Gohr printf("%-40s %s\n", $main, $sub); 9464cebf71SAndreas Gohr } 9564cebf71SAndreas Gohr } 96e8bb93a5SAndreas Gohr } 97e8bb93a5SAndreas Gohr } 98e8bb93a5SAndreas Gohr 9942ea7f44SGerrit Uitslag /** 10042ea7f44SGerrit Uitslag * Determine directions of the search loop 10142ea7f44SGerrit Uitslag * 10242ea7f44SGerrit Uitslag * @param string $entry 10342ea7f44SGerrit Uitslag * @param string $basepath 10442ea7f44SGerrit Uitslag * @return int 10542ea7f44SGerrit Uitslag */ 106d868eb89SAndreas Gohr protected function dirFilter($entry, $basepath) 107d868eb89SAndreas Gohr { 1081caeb00aSHarry Fuecks if ($entry == '.' || $entry == '..') { 109e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 1101caeb00aSHarry Fuecks } 1111caeb00aSHarry Fuecks if (is_dir($basepath . '/' . $entry)) { 1121caeb00aSHarry Fuecks if (strpos($entry, '_') === 0) { 113e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 1141caeb00aSHarry Fuecks } 115e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_NS; 1161caeb00aSHarry Fuecks } 1171caeb00aSHarry Fuecks if (preg_match('/\.txt$/', $entry)) { 118e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_PAGE; 1191caeb00aSHarry Fuecks } 120e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 1211caeb00aSHarry Fuecks } 1221caeb00aSHarry Fuecks 12342ea7f44SGerrit Uitslag /** 12442ea7f44SGerrit Uitslag * Collects recursively the pages in a namespace 12542ea7f44SGerrit Uitslag * 12642ea7f44SGerrit Uitslag * @param string $dir 12742ea7f44SGerrit Uitslag * @return array 12842ea7f44SGerrit Uitslag * @throws DokuCLI_Exception 12942ea7f44SGerrit Uitslag */ 130d868eb89SAndreas Gohr protected function getPages($dir) 131d868eb89SAndreas Gohr { 1320ea51e63SMatt Perry static $trunclen = null; 1331caeb00aSHarry Fuecks if (!$trunclen) { 1341caeb00aSHarry Fuecks global $conf; 1351caeb00aSHarry Fuecks $trunclen = strlen($conf['datadir'] . ':'); 1361caeb00aSHarry Fuecks } 1371caeb00aSHarry Fuecks 1381caeb00aSHarry Fuecks if (!is_dir($dir)) { 139e8bb93a5SAndreas Gohr throw new DokuCLI_Exception("Unable to read directory $dir"); 1401caeb00aSHarry Fuecks } 1411caeb00aSHarry Fuecks 142b1f206e1SAndreas Gohr $pages = []; 1431caeb00aSHarry Fuecks $dh = opendir($dir); 14444881bd0Shenning.noren while (false !== ($entry = readdir($dh))) { 1452b2d0ba9SAndreas Gohr $status = $this->dirFilter($entry, $dir); 146e8bb93a5SAndreas Gohr if ($status == WantedPagesCLI::DIR_CONTINUE) { 1471caeb00aSHarry Fuecks continue; 148e8bb93a5SAndreas Gohr } elseif ($status == WantedPagesCLI::DIR_NS) { 1492b2d0ba9SAndreas Gohr $pages = array_merge($pages, $this->getPages($dir . '/' . $entry)); 1501caeb00aSHarry Fuecks } else { 151b1f206e1SAndreas Gohr $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry]; 1521caeb00aSHarry Fuecks $pages[] = $page; 1531caeb00aSHarry Fuecks } 1541caeb00aSHarry Fuecks } 1551caeb00aSHarry Fuecks closedir($dh); 1561caeb00aSHarry Fuecks return $pages; 1571caeb00aSHarry Fuecks } 1581caeb00aSHarry Fuecks 15942ea7f44SGerrit Uitslag /** 16064cebf71SAndreas Gohr * Parse instructions and add the non-existing links to the result array 16142ea7f44SGerrit Uitslag * 16242ea7f44SGerrit Uitslag * @param array $page array with page id and file path 16342ea7f44SGerrit Uitslag */ 164d868eb89SAndreas Gohr protected function internalLinks($page) 165d868eb89SAndreas Gohr { 1661caeb00aSHarry Fuecks global $conf; 1671caeb00aSHarry Fuecks $instructions = p_get_instructions(file_get_contents($page['file'])); 1688c6be208SAndreas Gohr $resolver = new PageResolver($page['id']); 169ffe3602cSMyron Turner $pid = $page['id']; 1701caeb00aSHarry Fuecks foreach ($instructions as $ins) { 1711caeb00aSHarry Fuecks if ($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) { 1728c6be208SAndreas Gohr $mid = $resolver->resolveId($ins[1][0]); 1738c6be208SAndreas Gohr if (!page_exists($mid)) { 174b1f206e1SAndreas Gohr [$mid] = explode('#', $mid); //record pages without hashes 17564cebf71SAndreas Gohr 17664cebf71SAndreas Gohr if ($this->sort == 'origin') { 17764cebf71SAndreas Gohr $this->result[$pid][] = $mid; 17864cebf71SAndreas Gohr } else { 17964cebf71SAndreas Gohr $this->result[$mid][] = $pid; 180d63d2c63SMyron Turner } 1811caeb00aSHarry Fuecks } 1821caeb00aSHarry Fuecks } 18364cebf71SAndreas Gohr } 1841caeb00aSHarry Fuecks } 1851caeb00aSHarry Fuecks} 1861caeb00aSHarry Fuecks 187e8bb93a5SAndreas Gohr// Main 188e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI(); 189e8bb93a5SAndreas Gohr$cli->run(); 190