1#!/usr/bin/php 2<?php 3if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../').'/'); 4define('NOSESSION', 1); 5require_once(DOKU_INC.'inc/init.php'); 6 7/** 8 * Find wanted pages 9 */ 10class WantedPagesCLI extends DokuCLI { 11 12 const DIR_CONTINUE = 1; 13 const DIR_NS = 2; 14 const DIR_PAGE = 3; 15 private $show_pages = false; 16 /** 17 * Register options and arguments on the given $options object 18 * 19 * @param DokuCLI_Options $options 20 * @return void 21 */ 22 protected function setup(DokuCLI_Options $options) { 23 $options->setHelp( 24 'Outputs a list of wanted pages (pages which have internal links but do not yet exist).' 25 ); 26 $options->registerArgument( 27 'namespace', 28 'The namespace to lookup. Defaults to root namespace', 29 false 30 ); 31 $options->registerCommand( 32 'show-pages', 33 'Show wiki pages on which broken links (i.e. wanted pages) are found, listed as: wiki_page=>broken_link' 34 ); 35 36 } 37 38 /** 39 * Your main program 40 * 41 * Arguments and options have been parsed when this is run 42 * 43 * @param DokuCLI_Options $options 44 * @return void 45 */ 46 protected function main(DokuCLI_Options $options) { 47 48 if($options->args) { 49 $startdir = dirname(wikiFN($options->args[0].':xxx')); 50 } else { 51 $startdir = dirname(wikiFN('xxx')); 52 } 53 54 $cmd = $options->getCmd(); 55 if($cmd == 'show-pages') { 56 $this->show_pages = true; 57 } 58 59 $this->info("searching $startdir"); 60 61 $wanted_pages = array(); 62 63 foreach($this->get_pages($startdir) as $page) { 64 $wanted_pages = array_merge($wanted_pages, $this->internal_links($page)); 65 } 66 $wanted_pages = array_unique($wanted_pages); 67 sort($wanted_pages); 68 69 foreach($wanted_pages as $page) { 70 print $page."\n"; 71 } 72 } 73 74 /** 75 * Determine directions of the search loop 76 * 77 * @param string $entry 78 * @param string $basepath 79 * @return int 80 */ 81 protected function dir_filter($entry, $basepath) { 82 if($entry == '.' || $entry == '..') { 83 return WantedPagesCLI::DIR_CONTINUE; 84 } 85 if(is_dir($basepath.'/'.$entry)) { 86 if(strpos($entry, '_') === 0) { 87 return WantedPagesCLI::DIR_CONTINUE; 88 } 89 return WantedPagesCLI::DIR_NS; 90 } 91 if(preg_match('/\.txt$/', $entry)) { 92 return WantedPagesCLI::DIR_PAGE; 93 } 94 return WantedPagesCLI::DIR_CONTINUE; 95 } 96 97 /** 98 * Collects recursively the pages in a namespace 99 * 100 * @param string $dir 101 * @return array 102 * @throws DokuCLI_Exception 103 */ 104 protected function get_pages($dir) { 105 static $trunclen = null; 106 if(!$trunclen) { 107 global $conf; 108 $trunclen = strlen($conf['datadir'].':'); 109 } 110 111 if(!is_dir($dir)) { 112 throw new DokuCLI_Exception("Unable to read directory $dir"); 113 } 114 115 $pages = array(); 116 $dh = opendir($dir); 117 while(false !== ($entry = readdir($dh))) { 118 $status = $this->dir_filter($entry, $dir); 119 if($status == WantedPagesCLI::DIR_CONTINUE) { 120 continue; 121 } else if($status == WantedPagesCLI::DIR_NS) { 122 $pages = array_merge($pages, $this->get_pages($dir.'/'.$entry)); 123 } else { 124 $page = array( 125 'id' => pathID(substr($dir.'/'.$entry, $trunclen)), 126 'file' => $dir.'/'.$entry, 127 ); 128 $pages[] = $page; 129 } 130 } 131 closedir($dh); 132 return $pages; 133 } 134 135 /** 136 * Parse instructions and returns the non-existing links 137 * 138 * @param array $page array with page id and file path 139 * @return array 140 */ 141 function internal_links($page) { 142 global $conf; 143 $instructions = p_get_instructions(file_get_contents($page['file'])); 144 $links = array(); 145 $cns = getNS($page['id']); 146 $exists = false; 147 $pid = $page['id']; 148 foreach($instructions as $ins) { 149 if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) { 150 $mid = $ins[1][0]; 151 resolve_pageid($cns, $mid, $exists); 152 if(!$exists) { 153 list($mid) = explode('#', $mid); //record pages without hashs 154 if($this->show_pages) { 155 $links[] = "$pid => $mid"; 156 } 157 else $links[] = $mid; 158 } 159 } 160 } 161 return $links; 162 } 163} 164 165// Main 166$cli = new WantedPagesCLI(); 167$cli->run();