1#!/usr/bin/env php
2<?php
3
4use dokuwiki\Utf8\Sort;
5use dokuwiki\File\PageResolver;
6use splitbrain\phpcli\CLI;
7use splitbrain\phpcli\Options;
8
9if (!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/');
10define('NOSESSION', 1);
11require_once(DOKU_INC . 'inc/init.php');
12
13/**
14 * Find wanted pages
15 */
16class WantedPagesCLI extends CLI
17{
18    protected const DIR_CONTINUE = 1;
19    protected const DIR_NS = 2;
20    protected const DIR_PAGE = 3;
21
22    private $skip = false;
23    private $sort = 'wanted';
24
25    private $result = [];
26
27    /**
28     * Register options and arguments on the given $options object
29     *
30     * @param Options $options
31     * @return void
32     */
33    protected function setup(Options $options)
34    {
35        $options->setHelp(
36            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
37            ' (the pages that are linkin to these missing pages).'
38        );
39        $options->registerArgument(
40            'namespace',
41            'The namespace to lookup. Defaults to root namespace',
42            false
43        );
44
45        $options->registerOption(
46            'sort',
47            'Sort by wanted or origin page',
48            's',
49            '(wanted|origin)'
50        );
51
52        $options->registerOption(
53            'skip',
54            'Do not show the second dimension',
55            'k'
56        );
57    }
58
59    /**
60     * Your main program
61     *
62     * Arguments and options have been parsed when this is run
63     *
64     * @param Options $options
65     * @return void
66     */
67    protected function main(Options $options)
68    {
69        $args = $options->getArgs();
70        if ($args) {
71            $startdir = dirname(wikiFN($args[0] . ':xxx'));
72        } else {
73            $startdir = dirname(wikiFN('xxx'));
74        }
75
76        $this->skip = $options->getOpt('skip');
77        $this->sort = $options->getOpt('sort');
78
79        $this->info("searching $startdir");
80
81        foreach ($this->getPages($startdir) as $page) {
82            $this->internalLinks($page);
83        }
84        Sort::ksort($this->result);
85        foreach ($this->result as $main => $subs) {
86            if ($this->skip) {
87                echo "$main\n";
88            } else {
89                $subs = array_unique($subs);
90                Sort::sort($subs);
91                foreach ($subs as $sub) {
92                    printf("%-40s %s\n", $main, $sub);
93                }
94            }
95        }
96    }
97
98    /**
99     * Determine directions of the search loop
100     *
101     * @param string $entry
102     * @param string $basepath
103     * @return int
104     */
105    protected function dirFilter($entry, $basepath)
106    {
107        if ($entry == '.' || $entry == '..') {
108            return WantedPagesCLI::DIR_CONTINUE;
109        }
110        if (is_dir($basepath . '/' . $entry)) {
111            if (strpos($entry, '_') === 0) {
112                return WantedPagesCLI::DIR_CONTINUE;
113            }
114            return WantedPagesCLI::DIR_NS;
115        }
116        if (preg_match('/\.txt$/', $entry)) {
117            return WantedPagesCLI::DIR_PAGE;
118        }
119        return WantedPagesCLI::DIR_CONTINUE;
120    }
121
122    /**
123     * Collects recursively the pages in a namespace
124     *
125     * @param string $dir
126     * @return array
127     * @throws DokuCLI_Exception
128     */
129    protected function getPages($dir)
130    {
131        static $trunclen = null;
132        if (!$trunclen) {
133            global $conf;
134            $trunclen = strlen($conf['datadir'] . ':');
135        }
136
137        if (!is_dir($dir)) {
138            throw new DokuCLI_Exception("Unable to read directory $dir");
139        }
140
141        $pages = [];
142        $dh = opendir($dir);
143        while (false !== ($entry = readdir($dh))) {
144            $status = $this->dirFilter($entry, $dir);
145            if ($status == WantedPagesCLI::DIR_CONTINUE) {
146                continue;
147            } elseif ($status == WantedPagesCLI::DIR_NS) {
148                $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
149            } else {
150                $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry];
151                $pages[] = $page;
152            }
153        }
154        closedir($dh);
155        return $pages;
156    }
157
158    /**
159     * Parse instructions and add the non-existing links to the result array
160     *
161     * @param array $page array with page id and file path
162     */
163    protected function internalLinks($page)
164    {
165        global $conf;
166        $instructions = p_get_instructions(file_get_contents($page['file']));
167        $resolver = new PageResolver($page['id']);
168        $pid = $page['id'];
169        foreach ($instructions as $ins) {
170            if ($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
171                $mid = $resolver->resolveId($ins[1][0]);
172                if (!page_exists($mid)) {
173                    [$mid] = explode('#', $mid); //record pages without hashes
174
175                    if ($this->sort == 'origin') {
176                        $this->result[$pid][] = $mid;
177                    } else {
178                        $this->result[$mid][] = $pid;
179                    }
180                }
181            }
182        }
183    }
184}
185
186// Main
187$cli = new WantedPagesCLI();
188$cli->run();
189