1#!/usr/bin/php 2<?php 3if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../').'/'); 4define('NOSESSION', 1); 5require_once(DOKU_INC.'inc/init.php'); 6 7/** 8 * Find wanted pages 9 */ 10class WantedPagesCLI extends DokuCLI { 11 12 const DIR_CONTINUE = 1; 13 const DIR_NS = 2; 14 const DIR_PAGE = 3; 15 private $show_pages = false; 16 /** 17 * Register options and arguments on the given $options object 18 * 19 * @param DokuCLI_Options $options 20 * @return void 21 */ 22 protected function setup(DokuCLI_Options $options) { 23 $options->setHelp( 24 'Outputs a list of wanted pages (pages which have internal links but do not yet exist).' 25 ); 26 $options->registerArgument( 27 'namespace', 28 'The namespace to lookup. Defaults to root namespace', 29 false 30 ); 31 $options->registerCommand( 32 'show-pages', 33 'Show wiki pages on which broken links (i.e. wanted pages) are found, listed as: wiki_page=>broken_link' 34 ); 35 36 } 37 38 /** 39 * Your main program 40 * 41 * Arguments and options have been parsed when this is run 42 * 43 * @param DokuCLI_Options $options 44 * @return void 45 */ 46 protected function main(DokuCLI_Options $options) { 47 global $argc, $argv; 48 49 if($options->args) { 50 $startdir = dirname(wikiFN($options->args[0].':xxx')); 51 } else { 52 $startdir = dirname(wikiFN('xxx')); 53 } 54 if($argv[1] == 'show-pages' || $argv[2] == 'show-pages') { 55 $this->show_pages = true; 56 } 57 58 $this->info("searching $startdir"); 59 60 $wanted_pages = array(); 61 62 foreach($this->get_pages($startdir) as $page) { 63 $wanted_pages = array_merge($wanted_pages, $this->internal_links($page)); 64 } 65 $wanted_pages = array_unique($wanted_pages); 66 sort($wanted_pages); 67 68 foreach($wanted_pages as $page) { 69 print $page."\n"; 70 } 71 } 72 73 /** 74 * Determine directions of the search loop 75 * 76 * @param string $entry 77 * @param string $basepath 78 * @return int 79 */ 80 protected function dir_filter($entry, $basepath) { 81 if($entry == '.' || $entry == '..') { 82 return WantedPagesCLI::DIR_CONTINUE; 83 } 84 if(is_dir($basepath.'/'.$entry)) { 85 if(strpos($entry, '_') === 0) { 86 return WantedPagesCLI::DIR_CONTINUE; 87 } 88 return WantedPagesCLI::DIR_NS; 89 } 90 if(preg_match('/\.txt$/', $entry)) { 91 return WantedPagesCLI::DIR_PAGE; 92 } 93 return WantedPagesCLI::DIR_CONTINUE; 94 } 95 96 /** 97 * Collects recursively the pages in a namespace 98 * 99 * @param string $dir 100 * @return array 101 * @throws DokuCLI_Exception 102 */ 103 protected function get_pages($dir) { 104 static $trunclen = null; 105 if(!$trunclen) { 106 global $conf; 107 $trunclen = strlen($conf['datadir'].':'); 108 } 109 110 if(!is_dir($dir)) { 111 throw new DokuCLI_Exception("Unable to read directory $dir"); 112 } 113 114 $pages = array(); 115 $dh = opendir($dir); 116 while(false !== ($entry = readdir($dh))) { 117 $status = $this->dir_filter($entry, $dir); 118 if($status == WantedPagesCLI::DIR_CONTINUE) { 119 continue; 120 } else if($status == WantedPagesCLI::DIR_NS) { 121 $pages = array_merge($pages, $this->get_pages($dir.'/'.$entry)); 122 } else { 123 $page = array( 124 'id' => pathID(substr($dir.'/'.$entry, $trunclen)), 125 'file' => $dir.'/'.$entry, 126 ); 127 $pages[] = $page; 128 } 129 } 130 closedir($dh); 131 return $pages; 132 } 133 134 /** 135 * Parse instructions and returns the non-existing links 136 * 137 * @param array $page array with page id and file path 138 * @return array 139 */ 140 function internal_links($page) { 141 global $conf; 142 $instructions = p_get_instructions(file_get_contents($page['file'])); 143 $links = array(); 144 $cns = getNS($page['id']); 145 $exists = false; 146 $pid = $page['id']; 147 foreach($instructions as $ins) { 148 if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) { 149 $mid = $ins[1][0]; 150 resolve_pageid($cns, $mid, $exists); 151 if(!$exists) { 152 list($mid) = explode('#', $mid); //record pages without hashs 153 if($this->show_pages) { 154 $links[] = "$pid => $mid"; 155 } 156 else $links[] = $mid; 157 } 158 } 159 } 160 return $links; 161 } 162} 163 164// Main 165$cli = new WantedPagesCLI(); 166$cli->run();