1#!/usr/bin/php 2<?php 3if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../').'/'); 4define('NOSESSION', 1); 5require_once(DOKU_INC.'inc/init.php'); 6 7/** 8 * Find wanted pages 9 */ 10class WantedPagesCLI extends DokuCLI { 11 12 const DIR_CONTINUE = 1; 13 const DIR_NS = 2; 14 const DIR_PAGE = 3; 15 16 /** 17 * Register options and arguments on the given $options object 18 * 19 * @param DokuCLI_Options $options 20 * @return void 21 */ 22 protected function setup(DokuCLI_Options $options) { 23 $options->setHelp( 24 'Outputs a list of wanted pages (pages which have internal links but do not yet exist).' 25 ); 26 $options->registerArgument( 27 'namespace', 28 'The namespace to lookup. Defaults to root namespace', 29 false 30 ); 31 } 32 33 /** 34 * Your main program 35 * 36 * Arguments and options have been parsed when this is run 37 * 38 * @param DokuCLI_Options $options 39 * @return void 40 */ 41 protected function main(DokuCLI_Options $options) { 42 43 if($options->args) { 44 $startdir = dirname(wikiFN($options->args[0].':xxx')); 45 } else { 46 $startdir = dirname(wikiFN('xxx')); 47 } 48 49 $this->info("searching $startdir"); 50 51 $wanted_pages = array(); 52 53 foreach($this->get_pages($startdir) as $page) { 54 $wanted_pages = array_merge($wanted_pages, $this->internal_links($page)); 55 } 56 $wanted_pages = array_unique($wanted_pages); 57 sort($wanted_pages); 58 59 foreach($wanted_pages as $page) { 60 print $page."\n"; 61 } 62 } 63 64 /** 65 * Determine directions of the search loop 66 * 67 * @param string $entry 68 * @param string $basepath 69 * @return int 70 */ 71 protected function dir_filter($entry, $basepath) { 72 if($entry == '.' || $entry == '..') { 73 return WantedPagesCLI::DIR_CONTINUE; 74 } 75 if(is_dir($basepath.'/'.$entry)) { 76 if(strpos($entry, '_') === 0) { 77 return WantedPagesCLI::DIR_CONTINUE; 78 } 79 return WantedPagesCLI::DIR_NS; 80 } 81 if(preg_match('/\.txt$/', $entry)) { 82 return WantedPagesCLI::DIR_PAGE; 83 } 84 return WantedPagesCLI::DIR_CONTINUE; 85 } 86 87 /** 88 * Collects recursively the pages in a namespace 89 * 90 * @param string $dir 91 * @return array 92 * @throws DokuCLI_Exception 93 */ 94 protected function get_pages($dir) { 95 static $trunclen = null; 96 if(!$trunclen) { 97 global $conf; 98 $trunclen = strlen($conf['datadir'].':'); 99 } 100 101 if(!is_dir($dir)) { 102 throw new DokuCLI_Exception("Unable to read directory $dir"); 103 } 104 105 $pages = array(); 106 $dh = opendir($dir); 107 while(false !== ($entry = readdir($dh))) { 108 $status = $this->dir_filter($entry, $dir); 109 if($status == WantedPagesCLI::DIR_CONTINUE) { 110 continue; 111 } else if($status == WantedPagesCLI::DIR_NS) { 112 $pages = array_merge($pages, $this->get_pages($dir.'/'.$entry)); 113 } else { 114 $page = array( 115 'id' => pathID(substr($dir.'/'.$entry, $trunclen)), 116 'file' => $dir.'/'.$entry, 117 ); 118 $pages[] = $page; 119 } 120 } 121 closedir($dh); 122 return $pages; 123 } 124 125 /** 126 * Parse instructions and returns the non-existing links 127 * 128 * @param array $page array with page id and file path 129 * @return array 130 */ 131 function internal_links($page) { 132 global $conf; 133 $instructions = p_get_instructions(file_get_contents($page['file'])); 134 $links = array(); 135 $cns = getNS($page['id']); 136 $exists = false; 137 $pid = $page['id']; 138 foreach($instructions as $ins) { 139 if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) { 140 $mid = $ins[1][0]; 141 resolve_pageid($cns, $mid, $exists); 142 if(!$exists) { 143 list($mid) = explode('#', $mid); //record pages without hashs 144 $links[] = "$pid => $mid"; 145 } 146 } 147 } 148 return $links; 149 } 150} 151 152// Main 153$cli = new WantedPagesCLI(); 154$cli->run();