1#!/usr/bin/env php 2<?php 3 4use dokuwiki\Utf8\Sort; 5use dokuwiki\File\PageResolver; 6use splitbrain\phpcli\CLI; 7use splitbrain\phpcli\Options; 8 9if(!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/'); 10define('NOSESSION', 1); 11require_once(DOKU_INC . 'inc/init.php'); 12 13/** 14 * Find wanted pages 15 */ 16class WantedPagesCLI extends CLI 17{ 18 19 protected const DIR_CONTINUE = 1; 20 protected const DIR_NS = 2; 21 protected const DIR_PAGE = 3; 22 23 private $skip = false; 24 private $sort = 'wanted'; 25 26 private $result = []; 27 28 /** 29 * Register options and arguments on the given $options object 30 * 31 * @param Options $options 32 * @return void 33 */ 34 protected function setup(Options $options) { 35 $options->setHelp( 36 'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' . 37 ' (the pages that are linkin to these missing pages).' 38 ); 39 $options->registerArgument( 40 'namespace', 41 'The namespace to lookup. Defaults to root namespace', 42 false 43 ); 44 45 $options->registerOption( 46 'sort', 47 'Sort by wanted or origin page', 48 's', 49 '(wanted|origin)' 50 ); 51 52 $options->registerOption( 53 'skip', 54 'Do not show the second dimension', 55 'k' 56 ); 57 } 58 59 /** 60 * Your main program 61 * 62 * Arguments and options have been parsed when this is run 63 * 64 * @param Options $options 65 * @return void 66 */ 67 protected function main(Options $options) { 68 $args = $options->getArgs(); 69 if($args) { 70 $startdir = dirname(wikiFN($args[0] . ':xxx')); 71 } else { 72 $startdir = dirname(wikiFN('xxx')); 73 } 74 75 $this->skip = $options->getOpt('skip'); 76 $this->sort = $options->getOpt('sort'); 77 78 $this->info("searching $startdir"); 79 80 foreach($this->getPages($startdir) as $page) { 81 $this->internalLinks($page); 82 } 83 Sort::ksort($this->result); 84 foreach($this->result as $main => $subs) { 85 if($this->skip) { 86 print "$main\n"; 87 } else { 88 $subs = array_unique($subs); 89 Sort::sort($subs); 90 foreach($subs as $sub) { 91 printf("%-40s %s\n", $main, $sub); 92 } 93 } 94 } 95 } 96 97 /** 98 * Determine directions of the search loop 99 * 100 * @param string $entry 101 * @param string $basepath 102 * @return int 103 */ 104 protected function dirFilter($entry, $basepath) { 105 if($entry == '.' || $entry == '..') { 106 return WantedPagesCLI::DIR_CONTINUE; 107 } 108 if(is_dir($basepath . '/' . $entry)) { 109 if(strpos($entry, '_') === 0) { 110 return WantedPagesCLI::DIR_CONTINUE; 111 } 112 return WantedPagesCLI::DIR_NS; 113 } 114 if(preg_match('/\.txt$/', $entry)) { 115 return WantedPagesCLI::DIR_PAGE; 116 } 117 return WantedPagesCLI::DIR_CONTINUE; 118 } 119 120 /** 121 * Collects recursively the pages in a namespace 122 * 123 * @param string $dir 124 * @return array 125 * @throws DokuCLI_Exception 126 */ 127 protected function getPages($dir) { 128 static $trunclen = null; 129 if(!$trunclen) { 130 global $conf; 131 $trunclen = strlen($conf['datadir'] . ':'); 132 } 133 134 if(!is_dir($dir)) { 135 throw new DokuCLI_Exception("Unable to read directory $dir"); 136 } 137 138 $pages = []; 139 $dh = opendir($dir); 140 while(false !== ($entry = readdir($dh))) { 141 $status = $this->dirFilter($entry, $dir); 142 if ($status == WantedPagesCLI::DIR_CONTINUE) { 143 continue; 144 } elseif ($status == WantedPagesCLI::DIR_NS) { 145 $pages = array_merge($pages, $this->getPages($dir . '/' . $entry)); 146 } else { 147 $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry]; 148 $pages[] = $page; 149 } 150 } 151 closedir($dh); 152 return $pages; 153 } 154 155 /** 156 * Parse instructions and add the non-existing links to the result array 157 * 158 * @param array $page array with page id and file path 159 */ 160 protected function internalLinks($page) { 161 global $conf; 162 $instructions = p_get_instructions(file_get_contents($page['file'])); 163 $resolver = new PageResolver($page['id']); 164 $pid = $page['id']; 165 foreach($instructions as $ins) { 166 if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) { 167 $mid = $resolver->resolveId($ins[1][0]); 168 if(!page_exists($mid)) { 169 [$mid] = explode('#', $mid); //record pages without hashes 170 171 if($this->sort == 'origin') { 172 $this->result[$pid][] = $mid; 173 } else { 174 $this->result[$mid][] = $pid; 175 } 176 } 177 } 178 } 179 } 180} 181 182// Main 183$cli = new WantedPagesCLI(); 184$cli->run(); 185