150b78159SElan Ruusamäe#!/usr/bin/php 21caeb00aSHarry Fuecks<?php 31caeb00aSHarry Fuecksif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/'); 4*e8bb93a5SAndreas Gohrdefine('NOSESSION', 1); 5*e8bb93a5SAndreas Gohrrequire_once(DOKU_INC . 'inc/init.php'); 61caeb00aSHarry Fuecks 7*e8bb93a5SAndreas Gohrclass WantedPagesCLI extends DokuCLI { 81caeb00aSHarry Fuecks 9*e8bb93a5SAndreas Gohr const DIR_CONTINUE = 1; 10*e8bb93a5SAndreas Gohr const DIR_NS = 2; 11*e8bb93a5SAndreas Gohr const DIR_PAGE = 3; 121caeb00aSHarry Fuecks 13*e8bb93a5SAndreas Gohr /** 14*e8bb93a5SAndreas Gohr * Register options and arguments on the given $options object 15*e8bb93a5SAndreas Gohr * 16*e8bb93a5SAndreas Gohr * @param DokuCLI_Options $options 17*e8bb93a5SAndreas Gohr * @return void 18*e8bb93a5SAndreas Gohr */ 19*e8bb93a5SAndreas Gohr protected function setup(DokuCLI_Options $options) { 20*e8bb93a5SAndreas Gohr $options->setHelp( 21*e8bb93a5SAndreas Gohr 'Outputs a list of wanted pages (pages which have internal links but do not yet exist).' 22*e8bb93a5SAndreas Gohr ); 23*e8bb93a5SAndreas Gohr $options->registerArgument( 24*e8bb93a5SAndreas Gohr 'namespace', 25*e8bb93a5SAndreas Gohr 'The namespace to lookup. Defaults to root namespace', 26*e8bb93a5SAndreas Gohr false 27*e8bb93a5SAndreas Gohr ); 281caeb00aSHarry Fuecks } 291caeb00aSHarry Fuecks 30*e8bb93a5SAndreas Gohr /** 31*e8bb93a5SAndreas Gohr * Your main program 32*e8bb93a5SAndreas Gohr * 33*e8bb93a5SAndreas Gohr * Arguments and options have been parsed when this is run 34*e8bb93a5SAndreas Gohr * 35*e8bb93a5SAndreas Gohr * @param DokuCLI_Options $options 36*e8bb93a5SAndreas Gohr * @return void 37*e8bb93a5SAndreas Gohr */ 38*e8bb93a5SAndreas Gohr protected function main(DokuCLI_Options $options) { 391caeb00aSHarry Fuecks 40*e8bb93a5SAndreas Gohr if($options->args) { 41*e8bb93a5SAndreas Gohr $startdir = dirname(wikiFN($options->args[0] . ':xxx')); 42*e8bb93a5SAndreas Gohr } else { 43*e8bb93a5SAndreas Gohr $startdir = dirname(wikiFN('xxx')); 44*e8bb93a5SAndreas Gohr } 45*e8bb93a5SAndreas Gohr 46*e8bb93a5SAndreas Gohr $this->info("searching $startdir"); 47*e8bb93a5SAndreas Gohr 48*e8bb93a5SAndreas Gohr $wanted_pages = array(); 49*e8bb93a5SAndreas Gohr 50*e8bb93a5SAndreas Gohr foreach($this->get_pages($startdir) as $page) { 51*e8bb93a5SAndreas Gohr $wanted_pages = array_merge($wanted_pages, $this->internal_links($page)); 52*e8bb93a5SAndreas Gohr } 53*e8bb93a5SAndreas Gohr $wanted_pages = array_unique($wanted_pages); 54*e8bb93a5SAndreas Gohr sort($wanted_pages); 55*e8bb93a5SAndreas Gohr 56*e8bb93a5SAndreas Gohr foreach($wanted_pages as $page) { 57*e8bb93a5SAndreas Gohr print $page . "\n"; 58*e8bb93a5SAndreas Gohr } 59*e8bb93a5SAndreas Gohr } 60*e8bb93a5SAndreas Gohr 61*e8bb93a5SAndreas Gohr protected function dir_filter($entry, $basepath) { 621caeb00aSHarry Fuecks if($entry == '.' || $entry == '..') { 63*e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 641caeb00aSHarry Fuecks } 651caeb00aSHarry Fuecks if(is_dir($basepath . '/' . $entry)) { 661caeb00aSHarry Fuecks if(strpos($entry, '_') === 0) { 67*e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 681caeb00aSHarry Fuecks } 69*e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_NS; 701caeb00aSHarry Fuecks } 711caeb00aSHarry Fuecks if(preg_match('/\.txt$/', $entry)) { 72*e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_PAGE; 731caeb00aSHarry Fuecks } 74*e8bb93a5SAndreas Gohr return WantedPagesCLI::DIR_CONTINUE; 751caeb00aSHarry Fuecks } 761caeb00aSHarry Fuecks 77*e8bb93a5SAndreas Gohr protected function get_pages($dir) { 780ea51e63SMatt Perry static $trunclen = null; 791caeb00aSHarry Fuecks if(!$trunclen) { 801caeb00aSHarry Fuecks global $conf; 811caeb00aSHarry Fuecks $trunclen = strlen($conf['datadir'] . ':'); 821caeb00aSHarry Fuecks } 831caeb00aSHarry Fuecks 841caeb00aSHarry Fuecks if(!is_dir($dir)) { 85*e8bb93a5SAndreas Gohr throw new DokuCLI_Exception("Unable to read directory $dir"); 861caeb00aSHarry Fuecks } 871caeb00aSHarry Fuecks 881caeb00aSHarry Fuecks $pages = array(); 891caeb00aSHarry Fuecks $dh = opendir($dir); 9044881bd0Shenning.noren while(false !== ($entry = readdir($dh))) { 91*e8bb93a5SAndreas Gohr $status = $this->dir_filter($entry, $dir); 92*e8bb93a5SAndreas Gohr if($status == WantedPagesCLI::DIR_CONTINUE) { 931caeb00aSHarry Fuecks continue; 94*e8bb93a5SAndreas Gohr } else if($status == WantedPagesCLI::DIR_NS) { 95*e8bb93a5SAndreas Gohr $pages = array_merge($pages, $this->get_pages($dir . '/' . $entry)); 961caeb00aSHarry Fuecks } else { 971caeb00aSHarry Fuecks $page = array( 98840583dcSAndreas Gohr 'id' => pathID(substr($dir . '/' . $entry, $trunclen)), 991caeb00aSHarry Fuecks 'file' => $dir . '/' . $entry, 1001caeb00aSHarry Fuecks ); 1011caeb00aSHarry Fuecks $pages[] = $page; 1021caeb00aSHarry Fuecks } 1031caeb00aSHarry Fuecks } 1041caeb00aSHarry Fuecks closedir($dh); 1051caeb00aSHarry Fuecks return $pages; 1061caeb00aSHarry Fuecks } 1071caeb00aSHarry Fuecks 108*e8bb93a5SAndreas Gohr function internal_links($page) { 1091caeb00aSHarry Fuecks global $conf; 1101caeb00aSHarry Fuecks $instructions = p_get_instructions(file_get_contents($page['file'])); 1111caeb00aSHarry Fuecks $links = array(); 1121caeb00aSHarry Fuecks $cns = getNS($page['id']); 11344881bd0Shenning.noren $exists = false; 1141caeb00aSHarry Fuecks foreach($instructions as $ins) { 1151caeb00aSHarry Fuecks if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) { 1161caeb00aSHarry Fuecks $mid = $ins[1][0]; 1171caeb00aSHarry Fuecks resolve_pageid($cns, $mid, $exists); 1181caeb00aSHarry Fuecks if(!$exists) { 119840583dcSAndreas Gohr list($mid) = explode('#', $mid); //record pages without hashs 1201caeb00aSHarry Fuecks $links[] = $mid; 1211caeb00aSHarry Fuecks } 1221caeb00aSHarry Fuecks } 1231caeb00aSHarry Fuecks } 1241caeb00aSHarry Fuecks return $links; 1251caeb00aSHarry Fuecks } 1261caeb00aSHarry Fuecks} 1271caeb00aSHarry Fuecks 128*e8bb93a5SAndreas Gohr// Main 129*e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI(); 130*e8bb93a5SAndreas Gohr$cli->run();