150b78159SElan Ruusamäe#!/usr/bin/php 21caeb00aSHarry Fuecks<?php 31caeb00aSHarry Fuecksif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../').'/'); 4e8bb93a5SAndreas Gohrdefine('NOSESSION', 1); 5e8bb93a5SAndreas Gohrrequire_once(DOKU_INC.'inc/init.php'); 61caeb00aSHarry Fuecks 71c36b3d8SAndreas Gohr/** 81c36b3d8SAndreas Gohr * Find wanted pages 91c36b3d8SAndreas Gohr */ 10e8bb93a5SAndreas Gohrclass WantedPagesCLI extends DokuCLI { 111caeb00aSHarry Fuecks 12e8bb93a5SAndreas Gohr const DIR_CONTINUE = 1; 13e8bb93a5SAndreas Gohr const DIR_NS = 2; 14e8bb93a5SAndreas Gohr const DIR_PAGE = 3; 15*d63d2c63SMyron Turner private $show_pages = false; 16e8bb93a5SAndreas Gohr /** 17e8bb93a5SAndreas Gohr * Register options and arguments on the given $options object 18e8bb93a5SAndreas Gohr * 19e8bb93a5SAndreas Gohr * @param DokuCLI_Options $options 20e8bb93a5SAndreas Gohr * @return void 21e8bb93a5SAndreas Gohr */ 22e8bb93a5SAndreas Gohr protected function setup(DokuCLI_Options $options) { 23e8bb93a5SAndreas Gohr $options->setHelp( 24e8bb93a5SAndreas Gohr 'Outputs a list of wanted pages (pages which have internal links but do not yet exist).' 25e8bb93a5SAndreas Gohr ); 26e8bb93a5SAndreas Gohr $options->registerArgument( 27e8bb93a5SAndreas Gohr 'namespace', 28e8bb93a5SAndreas Gohr 'The namespace to lookup. Defaults to root namespace', 29e8bb93a5SAndreas Gohr false 30e8bb93a5SAndreas Gohr ); 31*d63d2c63SMyron Turner $options->registerCommand( 32*d63d2c63SMyron Turner 'show-pages', 33*d63d2c63SMyron Turner 'Show wiki pages on which broken links (i.e. wanted pages) are found, listed as: wiki_page=>broken_link' 34*d63d2c63SMyron Turner ); 35*d63d2c63SMyron Turner 361caeb00aSHarry Fuecks } 371caeb00aSHarry Fuecks 38e8bb93a5SAndreas Gohr /** 39e8bb93a5SAndreas Gohr * Your main program 40e8bb93a5SAndreas Gohr * 41e8bb93a5SAndreas Gohr * Arguments and options have been parsed when this is run 42e8bb93a5SAndreas Gohr * 43e8bb93a5SAndreas Gohr * @param DokuCLI_Options $options 44e8bb93a5SAndreas Gohr * @return void 45e8bb93a5SAndreas Gohr */ 46e8bb93a5SAndreas Gohr protected function main(DokuCLI_Options $options) { 47*d63d2c63SMyron Turner global $argc, $argv; 481caeb00aSHarry Fuecks 49e8bb93a5SAndreas Gohr if($options->args) { 50e8bb93a5SAndreas Gohr $startdir = dirname(wikiFN($options->args[0].':xxx')); 51e8bb93a5SAndreas Gohr } else { 52e8bb93a5SAndreas Gohr $startdir = dirname(wikiFN('xxx')); 53e8bb93a5SAndreas Gohr } 54*d63d2c63SMyron Turner if($argv[1] == 'show-pages' || $argv[2] == 'show-pages') { 55*d63d2c63SMyron Turner $this->show_pages = true; 56*d63d2c63SMyron Turner } 57e8bb93a5SAndreas Gohr 58e8bb93a5SAndreas Gohr $this->info("searching $startdir"); 59e8bb93a5SAndreas Gohr 60e8bb93a5SAndreas Gohr $wanted_pages = array(); 61e8bb93a5SAndreas Gohr 62e8bb93a5SAndreas Gohr foreach($this->get_pages($startdir) as $page) { 63e8bb93a5SAndreas Gohr $wanted_pages = array_merge($wanted_pages, $this->internal_links($page)); 64e8bb93a5SAndreas Gohr } 65e8bb93a5SAndreas Gohr $wanted_pages = array_unique($wanted_pages); 66e8bb93a5SAndreas Gohr sort($wanted_pages); 67e8bb93a5SAndreas Gohr 68e8bb93a5SAndreas Gohr foreach($wanted_pages as $page) { 69e8bb93a5SAndreas Gohr print $page."\n"; 70e8bb93a5SAndreas Gohr } 71e8bb93a5SAndreas Gohr } 72e8bb93a5SAndreas Gohr 7342ea7f44SGerrit Uitslag /** 7442ea7f44SGerrit Uitslag * Determine directions of the search loop 7542ea7f44SGerrit Uitslag * 7642ea7f44SGerrit Uitslag * @param string $entry 7742ea7f44SGerrit Uitslag * @param string $basepath 7842ea7f44SGerrit Uitslag * @return int 7942ea7f44SGerrit Uitslag */ 80e8bb93a5SAndreas Gohr protected function dir_filter($entry, $basepath) { 811caeb00aSHarry Fuecks if($entry == '.' || $entry == '..') { 82e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 831caeb00aSHarry Fuecks } 841caeb00aSHarry Fuecks if(is_dir($basepath.'/'.$entry)) { 851caeb00aSHarry Fuecks if(strpos($entry, '_') === 0) { 86e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 871caeb00aSHarry Fuecks } 88e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_NS; 891caeb00aSHarry Fuecks } 901caeb00aSHarry Fuecks if(preg_match('/\.txt$/', $entry)) { 91e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_PAGE; 921caeb00aSHarry Fuecks } 93e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 941caeb00aSHarry Fuecks } 951caeb00aSHarry Fuecks 9642ea7f44SGerrit Uitslag /** 9742ea7f44SGerrit Uitslag * Collects recursively the pages in a namespace 9842ea7f44SGerrit Uitslag * 9942ea7f44SGerrit Uitslag * @param string $dir 10042ea7f44SGerrit Uitslag * @return array 10142ea7f44SGerrit Uitslag * @throws DokuCLI_Exception 10242ea7f44SGerrit Uitslag */ 103e8bb93a5SAndreas Gohr protected function get_pages($dir) { 1040ea51e63SMatt Perry static $trunclen = null; 1051caeb00aSHarry Fuecks if(!$trunclen) { 1061caeb00aSHarry Fuecks global $conf; 1071caeb00aSHarry Fuecks $trunclen = strlen($conf['datadir'].':'); 1081caeb00aSHarry Fuecks } 1091caeb00aSHarry Fuecks 1101caeb00aSHarry Fuecks if(!is_dir($dir)) { 111e8bb93a5SAndreas Gohr throw new DokuCLI_Exception("Unable to read directory $dir"); 1121caeb00aSHarry Fuecks } 1131caeb00aSHarry Fuecks 1141caeb00aSHarry Fuecks $pages = array(); 1151caeb00aSHarry Fuecks $dh = opendir($dir); 11644881bd0Shenning.noren while(false !== ($entry = readdir($dh))) { 117e8bb93a5SAndreas Gohr $status = $this->dir_filter($entry, $dir); 118e8bb93a5SAndreas Gohr if($status == WantedPagesCLI::DIR_CONTINUE) { 1191caeb00aSHarry Fuecks continue; 120e8bb93a5SAndreas Gohr } else if($status == WantedPagesCLI::DIR_NS) { 121e8bb93a5SAndreas Gohr $pages = array_merge($pages, $this->get_pages($dir.'/'.$entry)); 1221caeb00aSHarry Fuecks } else { 1231caeb00aSHarry Fuecks $page = array( 124840583dcSAndreas Gohr 'id' => pathID(substr($dir.'/'.$entry, $trunclen)), 1251caeb00aSHarry Fuecks 'file' => $dir.'/'.$entry, 1261caeb00aSHarry Fuecks ); 1271caeb00aSHarry Fuecks $pages[] = $page; 1281caeb00aSHarry Fuecks } 1291caeb00aSHarry Fuecks } 1301caeb00aSHarry Fuecks closedir($dh); 1311caeb00aSHarry Fuecks return $pages; 1321caeb00aSHarry Fuecks } 1331caeb00aSHarry Fuecks 13442ea7f44SGerrit Uitslag /** 13542ea7f44SGerrit Uitslag * Parse instructions and returns the non-existing links 13642ea7f44SGerrit Uitslag * 13742ea7f44SGerrit Uitslag * @param array $page array with page id and file path 13842ea7f44SGerrit Uitslag * @return array 13942ea7f44SGerrit Uitslag */ 140e8bb93a5SAndreas Gohr function internal_links($page) { 1411caeb00aSHarry Fuecks global $conf; 1421caeb00aSHarry Fuecks $instructions = p_get_instructions(file_get_contents($page['file'])); 1431caeb00aSHarry Fuecks $links = array(); 1441caeb00aSHarry Fuecks $cns = getNS($page['id']); 14544881bd0Shenning.noren $exists = false; 146ffe3602cSMyron Turner $pid = $page['id']; 1471caeb00aSHarry Fuecks foreach($instructions as $ins) { 1481caeb00aSHarry Fuecks if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) { 1491caeb00aSHarry Fuecks $mid = $ins[1][0]; 1501caeb00aSHarry Fuecks resolve_pageid($cns, $mid, $exists); 1511caeb00aSHarry Fuecks if(!$exists) { 152840583dcSAndreas Gohr list($mid) = explode('#', $mid); //record pages without hashs 153*d63d2c63SMyron Turner if($this->show_pages) { 154ffe3602cSMyron Turner $links[] = "$pid => $mid"; 1551caeb00aSHarry Fuecks } 156*d63d2c63SMyron Turner else $links[] = $mid; 157*d63d2c63SMyron Turner } 1581caeb00aSHarry Fuecks } 1591caeb00aSHarry Fuecks } 1601caeb00aSHarry Fuecks return $links; 1611caeb00aSHarry Fuecks } 1621caeb00aSHarry Fuecks} 1631caeb00aSHarry Fuecks 164e8bb93a5SAndreas Gohr// Main 165e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI(); 166e8bb93a5SAndreas Gohr$cli->run();