1#!/usr/bin/env php 2<?php 3 4use dokuwiki\Utf8\Sort; 5use dokuwiki\File\PageResolver; 6use splitbrain\phpcli\CLI; 7use splitbrain\phpcli\Options; 8 9if (!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/'); 10define('NOSESSION', 1); 11require_once(DOKU_INC . 'inc/init.php'); 12 13/** 14 * Find wanted pages 15 */ 16class WantedPagesCLI extends CLI 17{ 18 19 protected const DIR_CONTINUE = 1; 20 protected const DIR_NS = 2; 21 protected const DIR_PAGE = 3; 22 23 private $skip = false; 24 private $sort = 'wanted'; 25 26 private $result = []; 27 28 /** 29 * Register options and arguments on the given $options object 30 * 31 * @param Options $options 32 * @return void 33 */ 34 protected function setup(Options $options) 35 { 36 $options->setHelp( 37 'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' . 38 ' (the pages that are linkin to these missing pages).' 39 ); 40 $options->registerArgument( 41 'namespace', 42 'The namespace to lookup. Defaults to root namespace', 43 false 44 ); 45 46 $options->registerOption( 47 'sort', 48 'Sort by wanted or origin page', 49 's', 50 '(wanted|origin)' 51 ); 52 53 $options->registerOption( 54 'skip', 55 'Do not show the second dimension', 56 'k' 57 ); 58 } 59 60 /** 61 * Your main program 62 * 63 * Arguments and options have been parsed when this is run 64 * 65 * @param Options $options 66 * @return void 67 */ 68 protected function main(Options $options) 69 { 70 $args = $options->getArgs(); 71 if ($args) { 72 $startdir = dirname(wikiFN($args[0] . ':xxx')); 73 } else { 74 $startdir = dirname(wikiFN('xxx')); 75 } 76 77 $this->skip = $options->getOpt('skip'); 78 $this->sort = $options->getOpt('sort'); 79 80 $this->info("searching $startdir"); 81 82 foreach ($this->getPages($startdir) as $page) { 83 $this->internalLinks($page); 84 } 85 Sort::ksort($this->result); 86 foreach ($this->result as $main => $subs) { 87 if ($this->skip) { 88 print "$main\n"; 89 } else { 90 $subs = array_unique($subs); 91 Sort::sort($subs); 92 foreach ($subs as $sub) { 93 printf("%-40s %s\n", $main, $sub); 94 } 95 } 96 } 97 } 98 99 /** 100 * Determine directions of the search loop 101 * 102 * @param string $entry 103 * @param string $basepath 104 * @return int 105 */ 106 protected function dirFilter($entry, $basepath) 107 { 108 if ($entry == '.' || $entry == '..') { 109 return WantedPagesCLI::DIR_CONTINUE; 110 } 111 if (is_dir($basepath . '/' . $entry)) { 112 if (strpos($entry, '_') === 0) { 113 return WantedPagesCLI::DIR_CONTINUE; 114 } 115 return WantedPagesCLI::DIR_NS; 116 } 117 if (preg_match('/\.txt$/', $entry)) { 118 return WantedPagesCLI::DIR_PAGE; 119 } 120 return WantedPagesCLI::DIR_CONTINUE; 121 } 122 123 /** 124 * Collects recursively the pages in a namespace 125 * 126 * @param string $dir 127 * @return array 128 * @throws DokuCLI_Exception 129 */ 130 protected function getPages($dir) 131 { 132 static $trunclen = null; 133 if (!$trunclen) { 134 global $conf; 135 $trunclen = strlen($conf['datadir'] . ':'); 136 } 137 138 if (!is_dir($dir)) { 139 throw new DokuCLI_Exception("Unable to read directory $dir"); 140 } 141 142 $pages = []; 143 $dh = opendir($dir); 144 while (false !== ($entry = readdir($dh))) { 145 $status = $this->dirFilter($entry, $dir); 146 if ($status == WantedPagesCLI::DIR_CONTINUE) { 147 continue; 148 } elseif ($status == WantedPagesCLI::DIR_NS) { 149 $pages = array_merge($pages, $this->getPages($dir . '/' . $entry)); 150 } else { 151 $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry]; 152 $pages[] = $page; 153 } 154 } 155 closedir($dh); 156 return $pages; 157 } 158 159 /** 160 * Parse instructions and add the non-existing links to the result array 161 * 162 * @param array $page array with page id and file path 163 */ 164 protected function internalLinks($page) 165 { 166 global $conf; 167 $instructions = p_get_instructions(file_get_contents($page['file'])); 168 $resolver = new PageResolver($page['id']); 169 $pid = $page['id']; 170 foreach ($instructions as $ins) { 171 if ($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) { 172 $mid = $resolver->resolveId($ins[1][0]); 173 if (!page_exists($mid)) { 174 [$mid] = explode('#', $mid); //record pages without hashes 175 176 if ($this->sort == 'origin') { 177 $this->result[$pid][] = $mid; 178 } else { 179 $this->result[$mid][] = $pid; 180 } 181 } 182 } 183 } 184 } 185} 186 187// Main 188$cli = new WantedPagesCLI(); 189$cli->run(); 190