1#!/usr/bin/env php
2<?php
3
4use dokuwiki\Utf8\Sort;
5use dokuwiki\File\PageResolver;
6use splitbrain\phpcli\CLI;
7use splitbrain\phpcli\Options;
8
9if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
10define('NOSESSION', 1);
11require_once(DOKU_INC . 'inc/init.php');
12
13/**
14 * Find wanted pages
15 */
16class WantedPagesCLI extends CLI {
17
18    const DIR_CONTINUE = 1;
19    const DIR_NS = 2;
20    const DIR_PAGE = 3;
21
22    private $skip = false;
23    private $sort = 'wanted';
24
25    private $result = array();
26
27    /**
28     * Register options and arguments on the given $options object
29     *
30     * @param Options $options
31     * @return void
32     */
33    protected function setup(Options $options) {
34        $options->setHelp(
35            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
36            ' (the pages that are linkin to these missing pages).'
37        );
38        $options->registerArgument(
39            'namespace',
40            'The namespace to lookup. Defaults to root namespace',
41            false
42        );
43
44        $options->registerOption(
45            'sort',
46            'Sort by wanted or origin page',
47            's',
48            '(wanted|origin)'
49        );
50
51        $options->registerOption(
52            'skip',
53            'Do not show the second dimension',
54            'k'
55        );
56    }
57
58    /**
59     * Your main program
60     *
61     * Arguments and options have been parsed when this is run
62     *
63     * @param Options $options
64     * @return void
65     */
66    protected function main(Options $options) {
67        $args = $options->getArgs();
68        if($args) {
69            $startdir = dirname(wikiFN($args[0] . ':xxx'));
70        } else {
71            $startdir = dirname(wikiFN('xxx'));
72        }
73
74        $this->skip = $options->getOpt('skip');
75        $this->sort = $options->getOpt('sort');
76
77        $this->info("searching $startdir");
78
79        foreach($this->getPages($startdir) as $page) {
80            $this->internalLinks($page);
81        }
82        Sort::ksort($this->result);
83        foreach($this->result as $main => $subs) {
84            if($this->skip) {
85                print "$main\n";
86            } else {
87                $subs = array_unique($subs);
88                Sort::sort($subs);
89                foreach($subs as $sub) {
90                    printf("%-40s %s\n", $main, $sub);
91                }
92            }
93        }
94    }
95
96    /**
97     * Determine directions of the search loop
98     *
99     * @param string $entry
100     * @param string $basepath
101     * @return int
102     */
103    protected function dirFilter($entry, $basepath) {
104        if($entry == '.' || $entry == '..') {
105            return WantedPagesCLI::DIR_CONTINUE;
106        }
107        if(is_dir($basepath . '/' . $entry)) {
108            if(strpos($entry, '_') === 0) {
109                return WantedPagesCLI::DIR_CONTINUE;
110            }
111            return WantedPagesCLI::DIR_NS;
112        }
113        if(preg_match('/\.txt$/', $entry)) {
114            return WantedPagesCLI::DIR_PAGE;
115        }
116        return WantedPagesCLI::DIR_CONTINUE;
117    }
118
119    /**
120     * Collects recursively the pages in a namespace
121     *
122     * @param string $dir
123     * @return array
124     * @throws DokuCLI_Exception
125     */
126    protected function getPages($dir) {
127        static $trunclen = null;
128        if(!$trunclen) {
129            global $conf;
130            $trunclen = strlen($conf['datadir'] . ':');
131        }
132
133        if(!is_dir($dir)) {
134            throw new DokuCLI_Exception("Unable to read directory $dir");
135        }
136
137        $pages = array();
138        $dh = opendir($dir);
139        while(false !== ($entry = readdir($dh))) {
140            $status = $this->dirFilter($entry, $dir);
141            if($status == WantedPagesCLI::DIR_CONTINUE) {
142                continue;
143            } else if($status == WantedPagesCLI::DIR_NS) {
144                $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
145            } else {
146                $page = array(
147                    'id' => pathID(substr($dir . '/' . $entry, $trunclen)),
148                    'file' => $dir . '/' . $entry,
149                );
150                $pages[] = $page;
151            }
152        }
153        closedir($dh);
154        return $pages;
155    }
156
157    /**
158     * Parse instructions and add the non-existing links to the result array
159     *
160     * @param array $page array with page id and file path
161     */
162    protected function internalLinks($page) {
163        global $conf;
164        $instructions = p_get_instructions(file_get_contents($page['file']));
165        $resolver = new PageResolver($page['id']);
166        $pid = $page['id'];
167        foreach($instructions as $ins) {
168            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
169                $mid = $resolver->resolveId($ins[1][0]);
170                if(!page_exists($mid)) {
171                    list($mid) = explode('#', $mid); //record pages without hashes
172
173                    if($this->sort == 'origin') {
174                        $this->result[$pid][] = $mid;
175                    } else {
176                        $this->result[$mid][] = $pid;
177                    }
178                }
179            }
180        }
181    }
182}
183
184// Main
185$cli = new WantedPagesCLI();
186$cli->run();
187