xref: /dokuwiki/bin/wantedpages.php (revision fe15e2c063a38f65804c55e581c72b96ac36edf7)
1#!/usr/bin/env php
2<?php
3
4use dokuwiki\Utf8\Sort;
5use dokuwiki\File\PageResolver;
6use splitbrain\phpcli\CLI;
7use splitbrain\phpcli\Options;
8
9if (!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/');
10define('NOSESSION', 1);
11require_once(DOKU_INC . 'inc/init.php');
12
13/**
14 * Find wanted pages
15 */
16class WantedPagesCLI extends CLI
17{
18
19    protected const DIR_CONTINUE = 1;
20    protected const DIR_NS = 2;
21    protected const DIR_PAGE = 3;
22
23    private $skip = false;
24    private $sort = 'wanted';
25
26    private $result = [];
27
28    /**
29     * Register options and arguments on the given $options object
30     *
31     * @param Options $options
32     * @return void
33     */
34    protected function setup(Options $options)
35    {
36        $options->setHelp(
37            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
38            ' (the pages that are linkin to these missing pages).'
39        );
40        $options->registerArgument(
41            'namespace',
42            'The namespace to lookup. Defaults to root namespace',
43            false
44        );
45
46        $options->registerOption(
47            'sort',
48            'Sort by wanted or origin page',
49            's',
50            '(wanted|origin)'
51        );
52
53        $options->registerOption(
54            'skip',
55            'Do not show the second dimension',
56            'k'
57        );
58    }
59
60    /**
61     * Your main program
62     *
63     * Arguments and options have been parsed when this is run
64     *
65     * @param Options $options
66     * @return void
67     */
68    protected function main(Options $options)
69    {
70        $args = $options->getArgs();
71        if ($args) {
72            $startdir = dirname(wikiFN($args[0] . ':xxx'));
73        } else {
74            $startdir = dirname(wikiFN('xxx'));
75        }
76
77        $this->skip = $options->getOpt('skip');
78        $this->sort = $options->getOpt('sort');
79
80        $this->info("searching $startdir");
81
82        foreach ($this->getPages($startdir) as $page) {
83            $this->internalLinks($page);
84        }
85        Sort::ksort($this->result);
86        foreach ($this->result as $main => $subs) {
87            if ($this->skip) {
88                echo "$main\n";
89            } else {
90                $subs = array_unique($subs);
91                Sort::sort($subs);
92                foreach ($subs as $sub) {
93                    printf("%-40s %s\n", $main, $sub);
94                }
95            }
96        }
97    }
98
99    /**
100     * Determine directions of the search loop
101     *
102     * @param string $entry
103     * @param string $basepath
104     * @return int
105     */
106    protected function dirFilter($entry, $basepath)
107    {
108        if ($entry == '.' || $entry == '..') {
109            return WantedPagesCLI::DIR_CONTINUE;
110        }
111        if (is_dir($basepath . '/' . $entry)) {
112            if (strpos($entry, '_') === 0) {
113                return WantedPagesCLI::DIR_CONTINUE;
114            }
115            return WantedPagesCLI::DIR_NS;
116        }
117        if (preg_match('/\.txt$/', $entry)) {
118            return WantedPagesCLI::DIR_PAGE;
119        }
120        return WantedPagesCLI::DIR_CONTINUE;
121    }
122
123    /**
124     * Collects recursively the pages in a namespace
125     *
126     * @param string $dir
127     * @return array
128     * @throws DokuCLI_Exception
129     */
130    protected function getPages($dir)
131    {
132        static $trunclen = null;
133        if (!$trunclen) {
134            global $conf;
135            $trunclen = strlen($conf['datadir'] . ':');
136        }
137
138        if (!is_dir($dir)) {
139            throw new DokuCLI_Exception("Unable to read directory $dir");
140        }
141
142        $pages = [];
143        $dh = opendir($dir);
144        while (false !== ($entry = readdir($dh))) {
145            $status = $this->dirFilter($entry, $dir);
146            if ($status == WantedPagesCLI::DIR_CONTINUE) {
147                continue;
148            } elseif ($status == WantedPagesCLI::DIR_NS) {
149                $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
150            } else {
151                $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry];
152                $pages[] = $page;
153            }
154        }
155        closedir($dh);
156        return $pages;
157    }
158
159    /**
160     * Parse instructions and add the non-existing links to the result array
161     *
162     * @param array $page array with page id and file path
163     */
164    protected function internalLinks($page)
165    {
166        global $conf;
167        $instructions = p_get_instructions(file_get_contents($page['file']));
168        $resolver = new PageResolver($page['id']);
169        $pid = $page['id'];
170        foreach ($instructions as $ins) {
171            if ($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
172                $mid = $resolver->resolveId($ins[1][0]);
173                if (!page_exists($mid)) {
174                    [$mid] = explode('#', $mid); //record pages without hashes
175
176                    if ($this->sort == 'origin') {
177                        $this->result[$pid][] = $mid;
178                    } else {
179                        $this->result[$mid][] = $pid;
180                    }
181                }
182            }
183        }
184    }
185}
186
187// Main
188$cli = new WantedPagesCLI();
189$cli->run();
190