1#!/usr/bin/env php
2<?php
3
4use splitbrain\phpcli\CLI;
5use splitbrain\phpcli\Options;
6use dokuwiki\Utf8\Sort;
7
8if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
9define('NOSESSION', 1);
10require_once(DOKU_INC . 'inc/init.php');
11
12/**
13 * Find wanted pages
14 */
15class WantedPagesCLI extends CLI {
16
17    const DIR_CONTINUE = 1;
18    const DIR_NS = 2;
19    const DIR_PAGE = 3;
20
21    private $skip = false;
22    private $sort = 'wanted';
23
24    private $result = array();
25
26    /**
27     * Register options and arguments on the given $options object
28     *
29     * @param Options $options
30     * @return void
31     */
32    protected function setup(Options $options) {
33        $options->setHelp(
34            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
35            ' (the pages that are linkin to these missing pages).'
36        );
37        $options->registerArgument(
38            'namespace',
39            'The namespace to lookup. Defaults to root namespace',
40            false
41        );
42
43        $options->registerOption(
44            'sort',
45            'Sort by wanted or origin page',
46            's',
47            '(wanted|origin)'
48        );
49
50        $options->registerOption(
51            'skip',
52            'Do not show the second dimension',
53            'k'
54        );
55    }
56
57    /**
58     * Your main program
59     *
60     * Arguments and options have been parsed when this is run
61     *
62     * @param Options $options
63     * @return void
64     */
65    protected function main(Options $options) {
66        $args = $options->getArgs();
67        if($args) {
68            $startdir = dirname(wikiFN($args[0] . ':xxx'));
69        } else {
70            $startdir = dirname(wikiFN('xxx'));
71        }
72
73        $this->skip = $options->getOpt('skip');
74        $this->sort = $options->getOpt('sort');
75
76        $this->info("searching $startdir");
77
78        foreach($this->getPages($startdir) as $page) {
79            $this->internalLinks($page);
80        }
81        Sort::ksort($this->result);
82        foreach($this->result as $main => $subs) {
83            if($this->skip) {
84                print "$main\n";
85            } else {
86                $subs = array_unique($subs);
87                Sort::sort($subs);
88                foreach($subs as $sub) {
89                    printf("%-40s %s\n", $main, $sub);
90                }
91            }
92        }
93    }
94
95    /**
96     * Determine directions of the search loop
97     *
98     * @param string $entry
99     * @param string $basepath
100     * @return int
101     */
102    protected function dirFilter($entry, $basepath) {
103        if($entry == '.' || $entry == '..') {
104            return WantedPagesCLI::DIR_CONTINUE;
105        }
106        if(is_dir($basepath . '/' . $entry)) {
107            if(strpos($entry, '_') === 0) {
108                return WantedPagesCLI::DIR_CONTINUE;
109            }
110            return WantedPagesCLI::DIR_NS;
111        }
112        if(preg_match('/\.txt$/', $entry)) {
113            return WantedPagesCLI::DIR_PAGE;
114        }
115        return WantedPagesCLI::DIR_CONTINUE;
116    }
117
118    /**
119     * Collects recursively the pages in a namespace
120     *
121     * @param string $dir
122     * @return array
123     * @throws DokuCLI_Exception
124     */
125    protected function getPages($dir) {
126        static $trunclen = null;
127        if(!$trunclen) {
128            global $conf;
129            $trunclen = strlen($conf['datadir'] . ':');
130        }
131
132        if(!is_dir($dir)) {
133            throw new DokuCLI_Exception("Unable to read directory $dir");
134        }
135
136        $pages = array();
137        $dh = opendir($dir);
138        while(false !== ($entry = readdir($dh))) {
139            $status = $this->dirFilter($entry, $dir);
140            if($status == WantedPagesCLI::DIR_CONTINUE) {
141                continue;
142            } else if($status == WantedPagesCLI::DIR_NS) {
143                $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
144            } else {
145                $page = array(
146                    'id' => pathID(substr($dir . '/' . $entry, $trunclen)),
147                    'file' => $dir . '/' . $entry,
148                );
149                $pages[] = $page;
150            }
151        }
152        closedir($dh);
153        return $pages;
154    }
155
156    /**
157     * Parse instructions and add the non-existing links to the result array
158     *
159     * @param array $page array with page id and file path
160     */
161    protected function internalLinks($page) {
162        global $conf;
163        $instructions = p_get_instructions(file_get_contents($page['file']));
164        $cns = getNS($page['id']);
165        $exists = false;
166        $pid = $page['id'];
167        foreach($instructions as $ins) {
168            if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
169                $mid = $ins[1][0];
170                resolve_pageid($cns, $mid, $exists);
171                if(!$exists) {
172                    list($mid) = explode('#', $mid); //record pages without hashes
173
174                    if($this->sort == 'origin') {
175                        $this->result[$pid][] = $mid;
176                    } else {
177                        $this->result[$mid][] = $pid;
178                    }
179                }
180            }
181        }
182    }
183}
184
185// Main
186$cli = new WantedPagesCLI();
187$cli->run();
188