xref: /dokuwiki/bin/wantedpages.php (revision 964efa9c1f6d52677a51ea52e2957f1ab1e63617)
150b78159SElan Ruusamäe#!/usr/bin/php
21caeb00aSHarry Fuecks<?php
31caeb00aSHarry Fuecksif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../').'/');
4e8bb93a5SAndreas Gohrdefine('NOSESSION', 1);
5e8bb93a5SAndreas Gohrrequire_once(DOKU_INC.'inc/init.php');
61caeb00aSHarry Fuecks
71c36b3d8SAndreas Gohr/**
81c36b3d8SAndreas Gohr * Find wanted pages
91c36b3d8SAndreas Gohr */
10e8bb93a5SAndreas Gohrclass WantedPagesCLI extends DokuCLI {
111caeb00aSHarry Fuecks
12e8bb93a5SAndreas Gohr    const DIR_CONTINUE = 1;
13e8bb93a5SAndreas Gohr    const DIR_NS       = 2;
14e8bb93a5SAndreas Gohr    const DIR_PAGE     = 3;
15d63d2c63SMyron Turner    private $show_pages = false;
16e8bb93a5SAndreas Gohr    /**
17e8bb93a5SAndreas Gohr     * Register options and arguments on the given $options object
18e8bb93a5SAndreas Gohr     *
19e8bb93a5SAndreas Gohr     * @param DokuCLI_Options $options
20e8bb93a5SAndreas Gohr     * @return void
21e8bb93a5SAndreas Gohr     */
22e8bb93a5SAndreas Gohr    protected function setup(DokuCLI_Options $options) {
23e8bb93a5SAndreas Gohr        $options->setHelp(
24e8bb93a5SAndreas Gohr            'Outputs a list of wanted pages (pages which have internal links but do not yet exist).'
25e8bb93a5SAndreas Gohr        );
26e8bb93a5SAndreas Gohr        $options->registerArgument(
27e8bb93a5SAndreas Gohr            'namespace',
28e8bb93a5SAndreas Gohr            'The namespace to lookup. Defaults to root namespace',
29e8bb93a5SAndreas Gohr            false
30e8bb93a5SAndreas Gohr        );
31d63d2c63SMyron Turner            $options->registerCommand(
32d63d2c63SMyron Turner            'show-pages',
33d63d2c63SMyron Turner            'Show wiki pages on which broken links (i.e. wanted pages) are found, listed as: wiki_page=>broken_link'
34d63d2c63SMyron Turner        );
35d63d2c63SMyron Turner
361caeb00aSHarry Fuecks    }
371caeb00aSHarry Fuecks
38e8bb93a5SAndreas Gohr    /**
39e8bb93a5SAndreas Gohr     * Your main program
40e8bb93a5SAndreas Gohr     *
41e8bb93a5SAndreas Gohr     * Arguments and options have been parsed when this is run
42e8bb93a5SAndreas Gohr     *
43e8bb93a5SAndreas Gohr     * @param DokuCLI_Options $options
44e8bb93a5SAndreas Gohr     * @return void
45e8bb93a5SAndreas Gohr     */
46e8bb93a5SAndreas Gohr    protected function main(DokuCLI_Options $options) {
471caeb00aSHarry Fuecks
48e8bb93a5SAndreas Gohr        if($options->args) {
49e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN($options->args[0].':xxx'));
50e8bb93a5SAndreas Gohr        } else {
51e8bb93a5SAndreas Gohr            $startdir = dirname(wikiFN('xxx'));
52e8bb93a5SAndreas Gohr        }
53*964efa9cSMyron Turner
54*964efa9cSMyron Turner        $cmd = $options->getCmd();
55*964efa9cSMyron Turner        if($cmd == 'show-pages') {
56d63d2c63SMyron Turner            $this->show_pages = true;
57d63d2c63SMyron Turner        }
58e8bb93a5SAndreas Gohr
59e8bb93a5SAndreas Gohr        $this->info("searching $startdir");
60e8bb93a5SAndreas Gohr
61e8bb93a5SAndreas Gohr        $wanted_pages = array();
62e8bb93a5SAndreas Gohr
63e8bb93a5SAndreas Gohr        foreach($this->get_pages($startdir) as $page) {
64e8bb93a5SAndreas Gohr            $wanted_pages = array_merge($wanted_pages, $this->internal_links($page));
65e8bb93a5SAndreas Gohr        }
66e8bb93a5SAndreas Gohr        $wanted_pages = array_unique($wanted_pages);
67e8bb93a5SAndreas Gohr        sort($wanted_pages);
68e8bb93a5SAndreas Gohr
69e8bb93a5SAndreas Gohr        foreach($wanted_pages as $page) {
70e8bb93a5SAndreas Gohr            print $page."\n";
71e8bb93a5SAndreas Gohr        }
72e8bb93a5SAndreas Gohr    }
73e8bb93a5SAndreas Gohr
7442ea7f44SGerrit Uitslag    /**
7542ea7f44SGerrit Uitslag     * Determine directions of the search loop
7642ea7f44SGerrit Uitslag     *
7742ea7f44SGerrit Uitslag     * @param string $entry
7842ea7f44SGerrit Uitslag     * @param string $basepath
7942ea7f44SGerrit Uitslag     * @return int
8042ea7f44SGerrit Uitslag     */
81e8bb93a5SAndreas Gohr    protected function dir_filter($entry, $basepath) {
821caeb00aSHarry Fuecks        if($entry == '.' || $entry == '..') {
83e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_CONTINUE;
841caeb00aSHarry Fuecks        }
851caeb00aSHarry Fuecks        if(is_dir($basepath.'/'.$entry)) {
861caeb00aSHarry Fuecks            if(strpos($entry, '_') === 0) {
87e8bb93a5SAndreas Gohr                return WantedPagesCLI::DIR_CONTINUE;
881caeb00aSHarry Fuecks            }
89e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_NS;
901caeb00aSHarry Fuecks        }
911caeb00aSHarry Fuecks        if(preg_match('/\.txt$/', $entry)) {
92e8bb93a5SAndreas Gohr            return WantedPagesCLI::DIR_PAGE;
931caeb00aSHarry Fuecks        }
94e8bb93a5SAndreas Gohr        return WantedPagesCLI::DIR_CONTINUE;
951caeb00aSHarry Fuecks    }
961caeb00aSHarry Fuecks
9742ea7f44SGerrit Uitslag    /**
9842ea7f44SGerrit Uitslag     * Collects recursively the pages in a namespace
9942ea7f44SGerrit Uitslag     *
10042ea7f44SGerrit Uitslag     * @param string $dir
10142ea7f44SGerrit Uitslag     * @return array
10242ea7f44SGerrit Uitslag     * @throws DokuCLI_Exception
10342ea7f44SGerrit Uitslag     */
104e8bb93a5SAndreas Gohr    protected function get_pages($dir) {
1050ea51e63SMatt Perry        static $trunclen = null;
1061caeb00aSHarry Fuecks        if(!$trunclen) {
1071caeb00aSHarry Fuecks            global $conf;
1081caeb00aSHarry Fuecks            $trunclen = strlen($conf['datadir'].':');
1091caeb00aSHarry Fuecks        }
1101caeb00aSHarry Fuecks
1111caeb00aSHarry Fuecks        if(!is_dir($dir)) {
112e8bb93a5SAndreas Gohr            throw new DokuCLI_Exception("Unable to read directory $dir");
1131caeb00aSHarry Fuecks        }
1141caeb00aSHarry Fuecks
1151caeb00aSHarry Fuecks        $pages = array();
1161caeb00aSHarry Fuecks        $dh    = opendir($dir);
11744881bd0Shenning.noren        while(false !== ($entry = readdir($dh))) {
118e8bb93a5SAndreas Gohr            $status = $this->dir_filter($entry, $dir);
119e8bb93a5SAndreas Gohr            if($status == WantedPagesCLI::DIR_CONTINUE) {
1201caeb00aSHarry Fuecks                continue;
121e8bb93a5SAndreas Gohr            } else if($status == WantedPagesCLI::DIR_NS) {
122e8bb93a5SAndreas Gohr                $pages = array_merge($pages, $this->get_pages($dir.'/'.$entry));
1231caeb00aSHarry Fuecks            } else {
1241caeb00aSHarry Fuecks                $page    = array(
125840583dcSAndreas Gohr                    'id'   => pathID(substr($dir.'/'.$entry, $trunclen)),
1261caeb00aSHarry Fuecks                    'file' => $dir.'/'.$entry,
1271caeb00aSHarry Fuecks                );
1281caeb00aSHarry Fuecks                $pages[] = $page;
1291caeb00aSHarry Fuecks            }
1301caeb00aSHarry Fuecks        }
1311caeb00aSHarry Fuecks        closedir($dh);
1321caeb00aSHarry Fuecks        return $pages;
1331caeb00aSHarry Fuecks    }
1341caeb00aSHarry Fuecks
13542ea7f44SGerrit Uitslag    /**
13642ea7f44SGerrit Uitslag     * Parse instructions and returns the non-existing links
13742ea7f44SGerrit Uitslag     *
13842ea7f44SGerrit Uitslag     * @param array $page array with page id and file path
13942ea7f44SGerrit Uitslag     * @return array
14042ea7f44SGerrit Uitslag     */
141e8bb93a5SAndreas Gohr    function internal_links($page) {
1421caeb00aSHarry Fuecks        global $conf;
1431caeb00aSHarry Fuecks        $instructions = p_get_instructions(file_get_contents($page['file']));
1441caeb00aSHarry Fuecks        $links        = array();
1451caeb00aSHarry Fuecks        $cns          = getNS($page['id']);
14644881bd0Shenning.noren        $exists       = false;
147ffe3602cSMyron Turner        $pid = $page['id'];
1481caeb00aSHarry Fuecks        foreach($instructions as $ins) {
1491caeb00aSHarry Fuecks            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
1501caeb00aSHarry Fuecks                $mid = $ins[1][0];
1511caeb00aSHarry Fuecks                resolve_pageid($cns, $mid, $exists);
1521caeb00aSHarry Fuecks                if(!$exists) {
153840583dcSAndreas Gohr                    list($mid) = explode('#', $mid); //record pages without hashs
154d63d2c63SMyron Turner                    if($this->show_pages) {
155ffe3602cSMyron Turner                    $links[] = "$pid => $mid";
1561caeb00aSHarry Fuecks                    }
157d63d2c63SMyron Turner                    else $links[] = $mid;
158d63d2c63SMyron Turner                }
1591caeb00aSHarry Fuecks            }
1601caeb00aSHarry Fuecks        }
1611caeb00aSHarry Fuecks        return $links;
1621caeb00aSHarry Fuecks    }
1631caeb00aSHarry Fuecks}
1641caeb00aSHarry Fuecks
165e8bb93a5SAndreas Gohr// Main
166e8bb93a5SAndreas Gohr$cli = new WantedPagesCLI();
167e8bb93a5SAndreas Gohr$cli->run();