xref: /dokuwiki/bin/wantedpages.php (revision 964efa9c1f6d52677a51ea52e2957f1ab1e63617)
1#!/usr/bin/php
2<?php
3if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../').'/');
4define('NOSESSION', 1);
5require_once(DOKU_INC.'inc/init.php');
6
7/**
8 * Find wanted pages
9 */
10class WantedPagesCLI extends DokuCLI {
11
12    const DIR_CONTINUE = 1;
13    const DIR_NS       = 2;
14    const DIR_PAGE     = 3;
15    private $show_pages = false;
16    /**
17     * Register options and arguments on the given $options object
18     *
19     * @param DokuCLI_Options $options
20     * @return void
21     */
22    protected function setup(DokuCLI_Options $options) {
23        $options->setHelp(
24            'Outputs a list of wanted pages (pages which have internal links but do not yet exist).'
25        );
26        $options->registerArgument(
27            'namespace',
28            'The namespace to lookup. Defaults to root namespace',
29            false
30        );
31            $options->registerCommand(
32            'show-pages',
33            'Show wiki pages on which broken links (i.e. wanted pages) are found, listed as: wiki_page=>broken_link'
34        );
35
36    }
37
38    /**
39     * Your main program
40     *
41     * Arguments and options have been parsed when this is run
42     *
43     * @param DokuCLI_Options $options
44     * @return void
45     */
46    protected function main(DokuCLI_Options $options) {
47
48        if($options->args) {
49            $startdir = dirname(wikiFN($options->args[0].':xxx'));
50        } else {
51            $startdir = dirname(wikiFN('xxx'));
52        }
53
54        $cmd = $options->getCmd();
55        if($cmd == 'show-pages') {
56            $this->show_pages = true;
57        }
58
59        $this->info("searching $startdir");
60
61        $wanted_pages = array();
62
63        foreach($this->get_pages($startdir) as $page) {
64            $wanted_pages = array_merge($wanted_pages, $this->internal_links($page));
65        }
66        $wanted_pages = array_unique($wanted_pages);
67        sort($wanted_pages);
68
69        foreach($wanted_pages as $page) {
70            print $page."\n";
71        }
72    }
73
74    /**
75     * Determine directions of the search loop
76     *
77     * @param string $entry
78     * @param string $basepath
79     * @return int
80     */
81    protected function dir_filter($entry, $basepath) {
82        if($entry == '.' || $entry == '..') {
83            return WantedPagesCLI::DIR_CONTINUE;
84        }
85        if(is_dir($basepath.'/'.$entry)) {
86            if(strpos($entry, '_') === 0) {
87                return WantedPagesCLI::DIR_CONTINUE;
88            }
89            return WantedPagesCLI::DIR_NS;
90        }
91        if(preg_match('/\.txt$/', $entry)) {
92            return WantedPagesCLI::DIR_PAGE;
93        }
94        return WantedPagesCLI::DIR_CONTINUE;
95    }
96
97    /**
98     * Collects recursively the pages in a namespace
99     *
100     * @param string $dir
101     * @return array
102     * @throws DokuCLI_Exception
103     */
104    protected function get_pages($dir) {
105        static $trunclen = null;
106        if(!$trunclen) {
107            global $conf;
108            $trunclen = strlen($conf['datadir'].':');
109        }
110
111        if(!is_dir($dir)) {
112            throw new DokuCLI_Exception("Unable to read directory $dir");
113        }
114
115        $pages = array();
116        $dh    = opendir($dir);
117        while(false !== ($entry = readdir($dh))) {
118            $status = $this->dir_filter($entry, $dir);
119            if($status == WantedPagesCLI::DIR_CONTINUE) {
120                continue;
121            } else if($status == WantedPagesCLI::DIR_NS) {
122                $pages = array_merge($pages, $this->get_pages($dir.'/'.$entry));
123            } else {
124                $page    = array(
125                    'id'   => pathID(substr($dir.'/'.$entry, $trunclen)),
126                    'file' => $dir.'/'.$entry,
127                );
128                $pages[] = $page;
129            }
130        }
131        closedir($dh);
132        return $pages;
133    }
134
135    /**
136     * Parse instructions and returns the non-existing links
137     *
138     * @param array $page array with page id and file path
139     * @return array
140     */
141    function internal_links($page) {
142        global $conf;
143        $instructions = p_get_instructions(file_get_contents($page['file']));
144        $links        = array();
145        $cns          = getNS($page['id']);
146        $exists       = false;
147        $pid = $page['id'];
148        foreach($instructions as $ins) {
149            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
150                $mid = $ins[1][0];
151                resolve_pageid($cns, $mid, $exists);
152                if(!$exists) {
153                    list($mid) = explode('#', $mid); //record pages without hashs
154                    if($this->show_pages) {
155                    $links[] = "$pid => $mid";
156                    }
157                    else $links[] = $mid;
158                }
159            }
160        }
161        return $links;
162    }
163}
164
165// Main
166$cli = new WantedPagesCLI();
167$cli->run();