1#!/usr/bin/env php 2<?php 3 4use dokuwiki\Utf8\Sort; 5use dokuwiki\File\PageResolver; 6use splitbrain\phpcli\CLI; 7use splitbrain\phpcli\Options; 8 9if (!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/'); 10define('NOSESSION', 1); 11require_once(DOKU_INC . 'inc/init.php'); 12 13/** 14 * Find wanted pages 15 */ 16class WantedPagesCLI extends CLI 17{ 18 protected const DIR_CONTINUE = 1; 19 protected const DIR_NS = 2; 20 protected const DIR_PAGE = 3; 21 22 private $skip = false; 23 private $sort = 'wanted'; 24 25 private $result = []; 26 27 /** 28 * Register options and arguments on the given $options object 29 * 30 * @param Options $options 31 * @return void 32 */ 33 protected function setup(Options $options) 34 { 35 $options->setHelp( 36 'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' . 37 ' (the pages that are linkin to these missing pages).' 38 ); 39 $options->registerArgument( 40 'namespace', 41 'The namespace to lookup. Defaults to root namespace', 42 false 43 ); 44 45 $options->registerOption( 46 'sort', 47 'Sort by wanted or origin page', 48 's', 49 '(wanted|origin)' 50 ); 51 52 $options->registerOption( 53 'skip', 54 'Do not show the second dimension', 55 'k' 56 ); 57 } 58 59 /** 60 * Your main program 61 * 62 * Arguments and options have been parsed when this is run 63 * 64 * @param Options $options 65 * @return void 66 */ 67 protected function main(Options $options) 68 { 69 $args = $options->getArgs(); 70 if ($args) { 71 $startdir = dirname(wikiFN($args[0] . ':xxx')); 72 } else { 73 $startdir = dirname(wikiFN('xxx')); 74 } 75 76 $this->skip = $options->getOpt('skip'); 77 $this->sort = $options->getOpt('sort'); 78 79 $this->info("searching $startdir"); 80 81 foreach ($this->getPages($startdir) as $page) { 82 $this->internalLinks($page); 83 } 84 Sort::ksort($this->result); 85 foreach ($this->result as $main => $subs) { 86 if ($this->skip) { 87 echo "$main\n"; 88 } else { 89 $subs = array_unique($subs); 90 Sort::sort($subs); 91 foreach ($subs as $sub) { 92 printf("%-40s %s\n", $main, $sub); 93 } 94 } 95 } 96 } 97 98 /** 99 * Determine directions of the search loop 100 * 101 * @param string $entry 102 * @param string $basepath 103 * @return int 104 */ 105 protected function dirFilter($entry, $basepath) 106 { 107 if ($entry == '.' || $entry == '..') { 108 return WantedPagesCLI::DIR_CONTINUE; 109 } 110 if (is_dir($basepath . '/' . $entry)) { 111 if (strpos($entry, '_') === 0) { 112 return WantedPagesCLI::DIR_CONTINUE; 113 } 114 return WantedPagesCLI::DIR_NS; 115 } 116 if (preg_match('/\.txt$/', $entry)) { 117 return WantedPagesCLI::DIR_PAGE; 118 } 119 return WantedPagesCLI::DIR_CONTINUE; 120 } 121 122 /** 123 * Collects recursively the pages in a namespace 124 * 125 * @param string $dir 126 * @return array 127 * @throws DokuCLI_Exception 128 */ 129 protected function getPages($dir) 130 { 131 static $trunclen = null; 132 if (!$trunclen) { 133 global $conf; 134 $trunclen = strlen($conf['datadir'] . ':'); 135 } 136 137 if (!is_dir($dir)) { 138 throw new DokuCLI_Exception("Unable to read directory $dir"); 139 } 140 141 $pages = []; 142 $dh = opendir($dir); 143 while (false !== ($entry = readdir($dh))) { 144 $status = $this->dirFilter($entry, $dir); 145 if ($status == WantedPagesCLI::DIR_CONTINUE) { 146 continue; 147 } elseif ($status == WantedPagesCLI::DIR_NS) { 148 $pages = array_merge($pages, $this->getPages($dir . '/' . $entry)); 149 } else { 150 $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry]; 151 $pages[] = $page; 152 } 153 } 154 closedir($dh); 155 return $pages; 156 } 157 158 /** 159 * Parse instructions and add the non-existing links to the result array 160 * 161 * @param array $page array with page id and file path 162 */ 163 protected function internalLinks($page) 164 { 165 global $conf; 166 $instructions = p_get_instructions(file_get_contents($page['file'])); 167 $resolver = new PageResolver($page['id']); 168 $pid = $page['id']; 169 foreach ($instructions as $ins) { 170 if ($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) { 171 $mid = $resolver->resolveId($ins[1][0]); 172 if (!page_exists($mid)) { 173 [$mid] = explode('#', $mid); //record pages without hashes 174 175 if ($this->sort == 'origin') { 176 $this->result[$pid][] = $mid; 177 } else { 178 $this->result[$mid][] = $pid; 179 } 180 } 181 } 182 } 183 } 184} 185 186// Main 187$cli = new WantedPagesCLI(); 188$cli->run(); 189