xref: /dokuwiki/bin/wantedpages.php (revision 8c7c53b0321a3cd3116b8d3b2ad27863a38dece7)
1#!/usr/bin/env php
2<?php
3
4use dokuwiki\Utf8\Sort;
5use dokuwiki\File\PageResolver;
6use splitbrain\phpcli\CLI;
7use splitbrain\phpcli\Options;
8
9if(!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/');
10define('NOSESSION', 1);
11require_once(DOKU_INC . 'inc/init.php');
12
13/**
14 * Find wanted pages
15 */
16class WantedPagesCLI extends CLI
17{
18
19    protected const DIR_CONTINUE = 1;
20    protected const DIR_NS = 2;
21    protected const DIR_PAGE = 3;
22
23    private $skip = false;
24    private $sort = 'wanted';
25
26    private $result = [];
27
28    /**
29     * Register options and arguments on the given $options object
30     *
31     * @param Options $options
32     * @return void
33     */
34    protected function setup(Options $options) {
35        $options->setHelp(
36            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
37            ' (the pages that are linkin to these missing pages).'
38        );
39        $options->registerArgument(
40            'namespace',
41            'The namespace to lookup. Defaults to root namespace',
42            false
43        );
44
45        $options->registerOption(
46            'sort',
47            'Sort by wanted or origin page',
48            's',
49            '(wanted|origin)'
50        );
51
52        $options->registerOption(
53            'skip',
54            'Do not show the second dimension',
55            'k'
56        );
57    }
58
59    /**
60     * Your main program
61     *
62     * Arguments and options have been parsed when this is run
63     *
64     * @param Options $options
65     * @return void
66     */
67    protected function main(Options $options) {
68        $args = $options->getArgs();
69        if($args) {
70            $startdir = dirname(wikiFN($args[0] . ':xxx'));
71        } else {
72            $startdir = dirname(wikiFN('xxx'));
73        }
74
75        $this->skip = $options->getOpt('skip');
76        $this->sort = $options->getOpt('sort');
77
78        $this->info("searching $startdir");
79
80        foreach($this->getPages($startdir) as $page) {
81            $this->internalLinks($page);
82        }
83        Sort::ksort($this->result);
84        foreach($this->result as $main => $subs) {
85            if($this->skip) {
86                print "$main\n";
87            } else {
88                $subs = array_unique($subs);
89                Sort::sort($subs);
90                foreach($subs as $sub) {
91                    printf("%-40s %s\n", $main, $sub);
92                }
93            }
94        }
95    }
96
97    /**
98     * Determine directions of the search loop
99     *
100     * @param string $entry
101     * @param string $basepath
102     * @return int
103     */
104    protected function dirFilter($entry, $basepath) {
105        if($entry == '.' || $entry == '..') {
106            return WantedPagesCLI::DIR_CONTINUE;
107        }
108        if(is_dir($basepath . '/' . $entry)) {
109            if(strpos($entry, '_') === 0) {
110                return WantedPagesCLI::DIR_CONTINUE;
111            }
112            return WantedPagesCLI::DIR_NS;
113        }
114        if(preg_match('/\.txt$/', $entry)) {
115            return WantedPagesCLI::DIR_PAGE;
116        }
117        return WantedPagesCLI::DIR_CONTINUE;
118    }
119
120    /**
121     * Collects recursively the pages in a namespace
122     *
123     * @param string $dir
124     * @return array
125     * @throws DokuCLI_Exception
126     */
127    protected function getPages($dir) {
128        static $trunclen = null;
129        if(!$trunclen) {
130            global $conf;
131            $trunclen = strlen($conf['datadir'] . ':');
132        }
133
134        if(!is_dir($dir)) {
135            throw new DokuCLI_Exception("Unable to read directory $dir");
136        }
137
138        $pages = [];
139        $dh = opendir($dir);
140        while(false !== ($entry = readdir($dh))) {
141            $status = $this->dirFilter($entry, $dir);
142            if ($status == WantedPagesCLI::DIR_CONTINUE) {
143                continue;
144            } elseif ($status == WantedPagesCLI::DIR_NS) {
145                $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
146            } else {
147                $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry];
148                $pages[] = $page;
149            }
150        }
151        closedir($dh);
152        return $pages;
153    }
154
155    /**
156     * Parse instructions and add the non-existing links to the result array
157     *
158     * @param array $page array with page id and file path
159     */
160    protected function internalLinks($page) {
161        global $conf;
162        $instructions = p_get_instructions(file_get_contents($page['file']));
163        $resolver = new PageResolver($page['id']);
164        $pid = $page['id'];
165        foreach($instructions as $ins) {
166            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
167                $mid = $resolver->resolveId($ins[1][0]);
168                if(!page_exists($mid)) {
169                    [$mid] = explode('#', $mid); //record pages without hashes
170
171                    if($this->sort == 'origin') {
172                        $this->result[$pid][] = $mid;
173                    } else {
174                        $this->result[$mid][] = $pid;
175                    }
176                }
177            }
178        }
179    }
180}
181
182// Main
183$cli = new WantedPagesCLI();
184$cli->run();
185